Skip to content

Commit e758d09

Browse files
[x2cpg] Update diffgraph and cfgcreator with support for new edges (#5890)
* Edge-first ControlStructureTraversal with fallback * CfgCreator edge-first with order fallback * Update CPG Version Number * Incorporate review feedback * Add new edges support to diffgraph * Update Creator Base to auto-emit some edges where info is already provided
1 parent c5822bc commit e758d09

File tree

8 files changed

+644
-45
lines changed

8 files changed

+644
-45
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name := "joern"
22
ThisBuild / organization := "io.joern"
33
ThisBuild / scalaVersion := "3.6.4"
44

5-
val cpgVersion = "1.7.60"
5+
val cpgVersion = "1.7.61"
66

77
lazy val joerncli = Projects.joerncli
88
lazy val querydb = Projects.querydb

joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/Ast.scala

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,33 @@ object Ast {
3232
ast.conditionEdges.foreach { edge =>
3333
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.CONDITION)
3434
}
35+
ast.trueBodyEdges.foreach { edge =>
36+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.TRUE_BODY)
37+
}
38+
ast.falseBodyEdges.foreach { edge =>
39+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.FALSE_BODY)
40+
}
41+
ast.doBodyEdges.foreach { edge =>
42+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.DO_BODY)
43+
}
44+
ast.tryBodyEdges.foreach { edge =>
45+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.TRY_BODY)
46+
}
47+
ast.catchBodyEdges.foreach { edge =>
48+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.CATCH_BODY)
49+
}
50+
ast.finallyBodyEdges.foreach { edge =>
51+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.FINALLY_BODY)
52+
}
53+
ast.forInitEdges.foreach { edge =>
54+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.FOR_INIT)
55+
}
56+
ast.forUpdateEdges.foreach { edge =>
57+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.FOR_UPDATE)
58+
}
59+
ast.forBodyEdges.foreach { edge =>
60+
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.FOR_BODY)
61+
}
3562
ast.receiverEdges.foreach { edge =>
3663
diffGraph.addEdge(edge.src, edge.dst, EdgeTypes.RECEIVER)
3764
}
@@ -90,6 +117,15 @@ case class Ast(
90117
], // technically this should be a Seq[AstNewNode], but we also use it for non-ast nodes like Binding...
91118
edges: collection.Seq[AstEdge] = Vector.empty,
92119
conditionEdges: collection.Seq[AstEdge] = Vector.empty,
120+
trueBodyEdges: collection.Seq[AstEdge] = Vector.empty,
121+
falseBodyEdges: collection.Seq[AstEdge] = Vector.empty,
122+
doBodyEdges: collection.Seq[AstEdge] = Vector.empty,
123+
tryBodyEdges: collection.Seq[AstEdge] = Vector.empty,
124+
catchBodyEdges: collection.Seq[AstEdge] = Vector.empty,
125+
finallyBodyEdges: collection.Seq[AstEdge] = Vector.empty,
126+
forInitEdges: collection.Seq[AstEdge] = Vector.empty,
127+
forUpdateEdges: collection.Seq[AstEdge] = Vector.empty,
128+
forBodyEdges: collection.Seq[AstEdge] = Vector.empty,
93129
refEdges: collection.Seq[AstEdge] = Vector.empty,
94130
bindsEdges: collection.Seq[AstEdge] = Vector.empty,
95131
receiverEdges: collection.Seq[AstEdge] = Vector.empty,
@@ -113,6 +149,15 @@ case class Ast(
113149
}
114150
),
115151
conditionEdges = conditionEdges ++ other.conditionEdges,
152+
trueBodyEdges = trueBodyEdges ++ other.trueBodyEdges,
153+
falseBodyEdges = falseBodyEdges ++ other.falseBodyEdges,
154+
doBodyEdges = doBodyEdges ++ other.doBodyEdges,
155+
tryBodyEdges = tryBodyEdges ++ other.tryBodyEdges,
156+
catchBodyEdges = catchBodyEdges ++ other.catchBodyEdges,
157+
finallyBodyEdges = finallyBodyEdges ++ other.finallyBodyEdges,
158+
forInitEdges = forInitEdges ++ other.forInitEdges,
159+
forUpdateEdges = forUpdateEdges ++ other.forUpdateEdges,
160+
forBodyEdges = forBodyEdges ++ other.forBodyEdges,
116161
argEdges = argEdges ++ other.argEdges,
117162
receiverEdges = receiverEdges ++ other.receiverEdges,
118163
refEdges = refEdges ++ other.refEdges,
@@ -126,6 +171,15 @@ case class Ast(
126171
nodes ++ other.nodes,
127172
edges = edges ++ other.edges,
128173
conditionEdges = conditionEdges ++ other.conditionEdges,
174+
trueBodyEdges = trueBodyEdges ++ other.trueBodyEdges,
175+
falseBodyEdges = falseBodyEdges ++ other.falseBodyEdges,
176+
doBodyEdges = doBodyEdges ++ other.doBodyEdges,
177+
tryBodyEdges = tryBodyEdges ++ other.tryBodyEdges,
178+
catchBodyEdges = catchBodyEdges ++ other.catchBodyEdges,
179+
finallyBodyEdges = finallyBodyEdges ++ other.finallyBodyEdges,
180+
forInitEdges = forInitEdges ++ other.forInitEdges,
181+
forUpdateEdges = forUpdateEdges ++ other.forUpdateEdges,
182+
forBodyEdges = forBodyEdges ++ other.forBodyEdges,
129183
argEdges = argEdges ++ other.argEdges,
130184
receiverEdges = receiverEdges ++ other.receiverEdges,
131185
refEdges = refEdges ++ other.refEdges,
@@ -154,6 +208,56 @@ case class Ast(
154208
this.copy(conditionEdges = conditionEdges ++ List(AstEdge(src, dst)))
155209
}
156210

211+
def withTrueBodyEdge(src: NewNode, dst: NewNode): Ast = {
212+
Ast.neighbourValidation(src, dst, EdgeTypes.TRUE_BODY)
213+
this.copy(trueBodyEdges = trueBodyEdges ++ List(AstEdge(src, dst)))
214+
}
215+
216+
def withFalseBodyEdge(src: NewNode, dst: NewNode): Ast = {
217+
Ast.neighbourValidation(src, dst, EdgeTypes.FALSE_BODY)
218+
this.copy(falseBodyEdges = falseBodyEdges ++ List(AstEdge(src, dst)))
219+
}
220+
221+
def withDoBodyEdge(src: NewNode, dst: NewNode): Ast = {
222+
Ast.neighbourValidation(src, dst, EdgeTypes.DO_BODY)
223+
this.copy(doBodyEdges = doBodyEdges ++ List(AstEdge(src, dst)))
224+
}
225+
226+
def withTryBodyEdge(src: NewNode, dst: NewNode): Ast = {
227+
Ast.neighbourValidation(src, dst, EdgeTypes.TRY_BODY)
228+
this.copy(tryBodyEdges = tryBodyEdges ++ List(AstEdge(src, dst)))
229+
}
230+
231+
def withCatchBodyEdge(src: NewNode, dst: NewNode): Ast = {
232+
Ast.neighbourValidation(src, dst, EdgeTypes.CATCH_BODY)
233+
this.copy(catchBodyEdges = catchBodyEdges ++ List(AstEdge(src, dst)))
234+
}
235+
236+
def withCatchBodyEdges(src: NewNode, dsts: List[NewNode]): Ast = {
237+
dsts.foreach(dst => Ast.neighbourValidation(src, dst, EdgeTypes.CATCH_BODY))
238+
this.copy(catchBodyEdges = catchBodyEdges ++ dsts.map(AstEdge(src, _)))
239+
}
240+
241+
def withFinallyBodyEdge(src: NewNode, dst: NewNode): Ast = {
242+
Ast.neighbourValidation(src, dst, EdgeTypes.FINALLY_BODY)
243+
this.copy(finallyBodyEdges = finallyBodyEdges ++ List(AstEdge(src, dst)))
244+
}
245+
246+
def withForInitEdge(src: NewNode, dst: NewNode): Ast = {
247+
Ast.neighbourValidation(src, dst, EdgeTypes.FOR_INIT)
248+
this.copy(forInitEdges = forInitEdges ++ List(AstEdge(src, dst)))
249+
}
250+
251+
def withForUpdateEdge(src: NewNode, dst: NewNode): Ast = {
252+
Ast.neighbourValidation(src, dst, EdgeTypes.FOR_UPDATE)
253+
this.copy(forUpdateEdges = forUpdateEdges ++ List(AstEdge(src, dst)))
254+
}
255+
256+
def withForBodyEdge(src: NewNode, dst: NewNode): Ast = {
257+
Ast.neighbourValidation(src, dst, EdgeTypes.FOR_BODY)
258+
this.copy(forBodyEdges = forBodyEdges ++ List(AstEdge(src, dst)))
259+
}
260+
157261
def withRefEdge(src: NewNode, dst: NewNode): Ast = {
158262
Ast.neighbourValidation(src, dst, EdgeTypes.REF)
159263
this.copy(refEdges = refEdges ++ List(AstEdge(src, dst)))
@@ -259,6 +363,15 @@ case class Ast(
259363

260364
val newArgEdges = argEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
261365
val newConditionEdges = conditionEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
366+
val newTrueBodyEdges = trueBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
367+
val newFalseBodyEdges = falseBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
368+
val newDoBodyEdges = doBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
369+
val newTryBodyEdges = tryBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
370+
val newCatchBodyEdges = catchBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
371+
val newFinallyEdges = finallyBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
372+
val newForInitEdges = forInitEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
373+
val newForUpdateEdges = forUpdateEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
374+
val newForBodyEdges = forBodyEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
262375
val newRefEdges = refEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
263376
val newBindsEdges = bindsEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
264377
val newReceiverEdges = receiverEdges.filter(_.src == node).map(x => AstEdge(newNode, newIfExists(x.dst)))
@@ -268,6 +381,15 @@ case class Ast(
268381
.copy(
269382
argEdges = newArgEdges,
270383
conditionEdges = newConditionEdges,
384+
trueBodyEdges = newTrueBodyEdges,
385+
falseBodyEdges = newFalseBodyEdges,
386+
doBodyEdges = newDoBodyEdges,
387+
tryBodyEdges = newTryBodyEdges,
388+
catchBodyEdges = newCatchBodyEdges,
389+
finallyBodyEdges = newFinallyEdges,
390+
forInitEdges = newForInitEdges,
391+
forUpdateEdges = newForUpdateEdges,
392+
forBodyEdges = newForBodyEdges,
271393
refEdges = newRefEdges,
272394
bindsEdges = newBindsEdges,
273395
receiverEdges = newReceiverEdges,

joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/AstCreatorBase.scala

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,11 @@ abstract class AstCreatorBase[Node, NodeProcessor](filename: String)(implicit wi
182182
if (code.isDefined) {
183183
doWhileNode = doWhileNode.code(code.get)
184184
}
185-
controlStructureAst(doWhileNode, condition, body, placeConditionLast = true)
185+
val astWithChildren = controlStructureAst(doWhileNode, condition, body, placeConditionLast = true)
186+
body.headOption.flatMap(_.root) match {
187+
case Some(doBodyRoot) => astWithChildren.withDoBodyEdge(doWhileNode, doBodyRoot)
188+
case None => astWithChildren
189+
}
186190
}
187191

188192
def forAst(
@@ -210,17 +214,35 @@ abstract class AstCreatorBase[Node, NodeProcessor](filename: String)(implicit wi
210214
updateAsts: Seq[Ast],
211215
bodyAsts: Seq[Ast]
212216
): Ast = {
213-
val lineNumber = forNode.lineNumber
214-
val numOfLocals = locals.size
217+
val lineNumber = forNode.lineNumber
218+
val numOfLocals = locals.size
219+
val initBlock = setOrderExplicitly(wrapMultipleInBlock(initAsts, lineNumber), numOfLocals + 1)
220+
val conditionBlock = setOrderExplicitly(wrapMultipleInBlock(conditionAsts, lineNumber), numOfLocals + 2)
221+
val updateBlock = setOrderExplicitly(wrapMultipleInBlock(updateAsts, lineNumber), numOfLocals + 3)
215222
// for the expected orders see CfgCreator.cfgForForStatement
216223
if (bodyAsts.nonEmpty) setOrderExplicitly(bodyAsts.head, numOfLocals + 4)
217-
Ast(forNode)
224+
val astWithChildren = Ast(forNode)
218225
.withChildren(locals)
219-
.withChild(setOrderExplicitly(wrapMultipleInBlock(initAsts, lineNumber), numOfLocals + 1))
220-
.withChild(setOrderExplicitly(wrapMultipleInBlock(conditionAsts, lineNumber), numOfLocals + 2))
221-
.withChild(setOrderExplicitly(wrapMultipleInBlock(updateAsts, lineNumber), numOfLocals + 3))
226+
.withChild(initBlock)
227+
.withChild(conditionBlock)
228+
.withChild(updateBlock)
222229
.withChildren(bodyAsts)
223230
.withConditionEdges(forNode, conditionAsts.flatMap(_.root).toList)
231+
232+
val astWithForInit = initBlock.root match {
233+
case Some(initRoot) => astWithChildren.withForInitEdge(forNode, initRoot)
234+
case None => astWithChildren
235+
}
236+
237+
val astWithForUpdate = updateBlock.root match {
238+
case Some(updateRoot) => astWithForInit.withForUpdateEdge(forNode, updateRoot)
239+
case None => astWithForInit
240+
}
241+
242+
bodyAsts.headOption.flatMap(_.root) match {
243+
case Some(bodyRoot) => astWithForUpdate.withForBodyEdge(forNode, bodyRoot)
244+
case None => astWithForUpdate
245+
}
224246
}
225247

226248
/** For the given try body, catch ASTs and finally AST, create a try-catch-finally AST with orders set correctly for
@@ -248,10 +270,22 @@ abstract class AstCreatorBase[Node, NodeProcessor](filename: String)(implicit wi
248270
*/
249271
def tryCatchAst(tryNode: NewControlStructure, tryBodyAst: Ast, catchAsts: Seq[Ast], finallyAst: Option[Ast]): Ast = {
250272
setArgumentIndices(tryBodyAst +: (catchAsts ++ finallyAst.toSeq))
251-
Ast(tryNode)
273+
val astWithChildren = Ast(tryNode)
252274
.withChild(tryBodyAst)
253275
.withChildren(catchAsts)
254276
.withChildren(finallyAst.toSeq)
277+
278+
val astWithTryBody = tryBodyAst.root match {
279+
case Some(tryBodyRoot) => astWithChildren.withTryBodyEdge(tryNode, tryBodyRoot)
280+
case None => astWithChildren
281+
}
282+
283+
val astWithCatchBodies = astWithTryBody.withCatchBodyEdges(tryNode, catchAsts.flatMap(_.root).toList)
284+
285+
finallyAst.flatMap(_.root) match {
286+
case Some(finallyRoot) => astWithCatchBodies.withFinallyBodyEdge(tryNode, finallyRoot)
287+
case None => astWithCatchBodies
288+
}
255289
}
256290

257291
/** For a given block node and statement ASTs, create an AST that represents the block. The main purpose of this

joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/passes/controlflow/cfgcreation/CfgCreator.scala

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,11 @@ class CfgCreator(entryNode: Method, diffGraph: DiffGraphBuilder) {
185185
}
186186

187187
protected def cfgForThrowStatement(node: ControlStructure): Cfg = {
188-
val throwExprCfg = node.astChildren.find(_.order == 1).map(cfgFor).getOrElse(Cfg.empty)
188+
val throwExprCfg = Iterator(node)
189+
.coalesce(_._argumentOut.cast[AstNode], _.astChildren.order(1))
190+
.headOption
191+
.map(cfgFor)
192+
.getOrElse(Cfg.empty)
189193
val concatedNatedCfg = throwExprCfg ++ Cfg(entryNode = Option(node))
190194
concatedNatedCfg.copy(edges = concatedNatedCfg.edges ++ singleEdge(node, exitNode))
191195
}
@@ -373,12 +377,24 @@ class CfgCreator(entryNode: Method, diffGraph: DiffGraphBuilder) {
373377
* for the loop and a fringe.
374378
*/
375379
protected def cfgForForStatement(node: ControlStructure): Cfg = {
376-
val children = node.astChildren.l
377-
val nLocals = children.count(_.isLocal)
378-
val initExprCfg = children.find(_.order == nLocals + 1).map(cfgFor).getOrElse(Cfg.empty)
380+
val children = node.astChildren.l
381+
val nLocals = children.count(_.isLocal)
382+
val initExprCfg = Iterator(node)
383+
.coalesce(_._forInitOut.cast[AstNode], _.astChildren.order(nLocals + 1))
384+
.headOption
385+
.map(cfgFor)
386+
.getOrElse(Cfg.empty)
379387
val conditionCfg = children.find(_.order == nLocals + 2).map(cfgFor).getOrElse(Cfg.empty)
380-
val loopExprCfg = children.find(_.order == nLocals + 3).map(cfgFor).getOrElse(Cfg.empty)
381-
val bodyCfg = children.find(_.order == nLocals + 4).map(cfgFor).getOrElse(Cfg.empty)
388+
val loopExprCfg = Iterator(node)
389+
.coalesce(_._forUpdateOut.cast[AstNode], _.astChildren.order(nLocals + 3))
390+
.headOption
391+
.map(cfgFor)
392+
.getOrElse(Cfg.empty)
393+
val bodyCfg = Iterator(node)
394+
.coalesce(_._forBodyOut.cast[AstNode], _.astChildren.order(nLocals + 4))
395+
.headOption
396+
.map(cfgFor)
397+
.getOrElse(Cfg.empty)
382398

383399
val innerCfg = bodyCfg ++ loopExprCfg
384400
val loopEntryNode = conditionCfg.entryNode.orElse(innerCfg.entryNode)
@@ -409,7 +425,11 @@ class CfgCreator(entryNode: Method, diffGraph: DiffGraphBuilder) {
409425
* inner CFG as bodyCfg ++ conditionCfg and then connect edges according to the semantics of do-while.
410426
*/
411427
protected def cfgForDoStatement(node: ControlStructure): Cfg = {
412-
val bodyCfg = node.astChildren.where(_.order(1)).headOption.map(cfgFor).getOrElse(Cfg.empty)
428+
val bodyCfg = Iterator(node)
429+
.coalesce(_._doBodyOut.cast[AstNode], _.astChildren.order(1))
430+
.headOption
431+
.map(cfgFor)
432+
.getOrElse(Cfg.empty)
413433
val conditionCfg = node.condition.headOption.map(cfgFor).getOrElse(Cfg.empty)
414434
val innerCfg = bodyCfg ++ conditionCfg
415435

@@ -516,41 +536,42 @@ class CfgCreator(entryNode: Method, diffGraph: DiffGraphBuilder) {
516536
* field of the `Block` node must be set to `finally`.
517537
*/
518538
protected def cfgForTryStatement(node: ControlStructure): Cfg = {
519-
val maybeTryBlock =
520-
node.astChildren
521-
.order(1)
522-
.where(_.astChildren) // Filter out empty `try` bodies
523-
.headOption
539+
val maybeTryBlock = Iterator(node)
540+
.coalesce(
541+
_._tryBodyOut.cast[AstNode].filter(_.astChildren.nonEmpty),
542+
_.astChildren.order(1).where(_.astChildren) // Filter out empty `try` bodies
543+
)
544+
.headOption
524545

525546
val tryBodyCfg: Cfg = maybeTryBlock.map(cfgFor).getOrElse(Cfg.empty)
526547

527548
val catchControlStructures =
528549
(node.astChildren.isControlStructure.isCatch ++ node.astChildren.isControlStructure.isElse).toList
529-
val catchBodyCfgs = if (catchControlStructures.isEmpty) {
530-
node.astChildren.order(2).toList match {
531-
case Nil => List(Cfg.empty)
532-
case asts => asts.map(cfgFor)
533-
}
534-
} else {
535-
catchControlStructures match {
536-
case Nil => List(Cfg.empty)
537-
case asts => asts.map(cfgFor)
538-
}
550+
val catchBodyFallback =
551+
if (catchControlStructures.isEmpty) node.astChildren.order(2)
552+
else catchControlStructures.iterator
553+
554+
val catchBodyCfgs = Iterator(node)
555+
.coalesce(_._catchBodyOut.cast[AstNode], _ => catchBodyFallback)
556+
.map(cfgFor)
557+
.toList match {
558+
case Nil => List(Cfg.empty)
559+
case asts => asts
539560
}
540561

541562
val finallyControlStructures = node.astChildren.isControlStructure.isFinally.toList
542-
val maybeFinallyBodyCfg = if (catchControlStructures.isEmpty && finallyControlStructures.isEmpty) {
543-
node.astChildren
544-
.order(3)
545-
.map(cfgFor)
546-
.headOption // Assume there can only be one
547-
.toList
548-
} else {
549-
finallyControlStructures
550-
.map(cfgFor)
551-
.headOption // Assume there can only be one
552-
.toList
553-
}
563+
val finallyBodyFallback =
564+
if (catchControlStructures.isEmpty && finallyControlStructures.isEmpty) {
565+
node.astChildren.order(3)
566+
} else {
567+
finallyControlStructures.iterator
568+
}
569+
570+
val maybeFinallyBodyCfg = Iterator(node)
571+
.coalesce(_._finallyBodyOut.cast[AstNode], _ => finallyBodyFallback)
572+
.map(cfgFor)
573+
.headOption // Assume there can only be one
574+
.toList
554575

555576
val tryToCatchEdges = catchBodyCfgs.flatMap { catchBodyCfg =>
556577
edgesFromFringeTo(tryBodyCfg, catchBodyCfg.entryNode)

0 commit comments

Comments
 (0)