Skip to content

Commit 1d91b95

Browse files
authored
[Dataflow] Big improve for linked fns resolve (#2278)
* dep: fix sec-vuln * feat: return track for fdef resolve also perf+ for `addEdge` * test: improve explicit tests for call retrieval
1 parent 1086ac9 commit 1d91b95

File tree

14 files changed

+160
-119
lines changed

14 files changed

+160
-119
lines changed

package-lock.json

Lines changed: 21 additions & 58 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/dataflow/graph/graph.ts

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,7 @@ export class DataflowGraph<
326326
return this;
327327
}
328328

329-
/** {@inheritDoc} */
330-
public addEdge(from: NodeId, to: NodeId, type: EdgeType | number): this
331-
/** {@inheritDoc} */
332-
public addEdge(from: { nodeId: NodeId }, to: { nodeId: NodeId }, type: EdgeType | number): this
333-
/** {@inheritDoc} */
334-
public addEdge(from: NodeId | { nodeId: NodeId }, to: NodeId | { nodeId: NodeId }, type: EdgeType | number): this
335-
public addEdge(from: NodeId | { nodeId: NodeId }, to: NodeId | { nodeId: NodeId }, type: EdgeType | number): this {
336-
const [fromId, toId] = extractEdgeIds(from, to);
337-
329+
public addEdge(fromId: NodeId, toId: NodeId, type: EdgeType | number): this {
338330
if(fromId === toId) {
339331
return this;
340332
}
@@ -512,15 +504,6 @@ function mergeNodeInfos<Vertex extends DataflowGraphVertexInfo>(current: Vertex,
512504
return current;
513505
}
514506

515-
/**
516-
* Returns the ids of the dataflow vertices referenced.
517-
*/
518-
function extractEdgeIds(from: NodeId | { nodeId: NodeId }, to: NodeId | { nodeId: NodeId }): [fromId: NodeId, toId: NodeId] {
519-
const fromId = typeof from === 'object' ? from.nodeId : from;
520-
const toId = typeof to === 'object' ? to.nodeId : to;
521-
return [fromId, toId];
522-
}
523-
524507
export interface IEnvironmentJson {
525508
readonly id: number;
526509
parent: IEnvironmentJson;

src/dataflow/internal/linker.ts

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,10 @@ export function linkFunctionCallWithSingleTarget(
254254
if(defs === undefined) {
255255
continue;
256256
}
257-
for(const def of defs) {
258-
if(!isBuiltIn(def.nodeId)) {
259-
graph.addEdge(ingoing, def, EdgeType.DefinedByOnCall);
260-
graph.addEdge(id, def, EdgeType.DefinesOnCall);
257+
for(const { nodeId } of defs) {
258+
if(!isBuiltIn(nodeId)) {
259+
graph.addEdge(ingoing.nodeId, nodeId, EdgeType.DefinedByOnCall);
260+
graph.addEdge(id, nodeId, EdgeType.DefinesOnCall);
261261
}
262262
}
263263
}
@@ -319,6 +319,7 @@ function linkFunctionCall(
319319
}
320320

321321
const [functionDefs] = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph);
322+
322323
const propagateExitPoints: ExitPoint[] = [];
323324
for(const def of functionDefs.values()) {
324325
// we can skip this if we already linked it
@@ -395,6 +396,18 @@ const LinkedFnFollowBits = EdgeType.Reads | EdgeType.DefinedBy | EdgeType.Define
395396

396397
/**
397398
* Finds all linked function definitions starting from the given set of read ids.
399+
* This is a complicated function, please only call it if you know what you are doing.
400+
* For example, if you are interested in the called functions of a function call, use {@link getAllFunctionCallTargets} instead.
401+
* This function here expects you to handle the accessed objects yourself (e.g,. already resolve the first layer of reads/returns/calls/... or resolve the identifier by name)
402+
* and then pass in the relevant read ids.
403+
* @example
404+
* Consider a scenario like this:
405+
* ```R
406+
* x <- function() 3
407+
* x()
408+
* ```
409+
* To resolve the call `x` in the second line, use {@link getAllFunctionCallTargets}!
410+
* To know what fdefs the definition of `x` in the first line links to, you can use {@link getAllLinkedFunctionDefinitions|this function}.
398411
*/
399412
export function getAllLinkedFunctionDefinitions(
400413
functionDefinitionReadIds: ReadonlySet<NodeId>,
@@ -411,15 +424,16 @@ export function getAllLinkedFunctionDefinitions(
411424
const visited = new Set<NodeId>();
412425

413426
while(potential.length !== 0) {
414-
const currentId = potential.pop() as NodeId;
415-
visited.add(currentId);
427+
const cid = potential.pop() as NodeId;
428+
// console.log(`visiting ${recoverName(cid, dataflowGraph.idMap)} ${cid}`);
429+
visited.add(cid);
416430

417-
if(isBuiltIn(currentId)) {
418-
builtIns.add(currentId);
431+
if(isBuiltIn(cid)) {
432+
builtIns.add(cid);
419433
continue;
420434
}
421435

422-
const currentInfo = dataflowGraph.get(currentId, true);
436+
const currentInfo = dataflowGraph.get(cid, true);
423437
if(currentInfo === undefined) {
424438
continue;
425439
}
@@ -442,7 +456,7 @@ export function getAllLinkedFunctionDefinitions(
442456
}
443457
}
444458

445-
if(hasReturnEdge) {
459+
if(vertex.tag === VertexType.FunctionCall || hasReturnEdge) {
446460
continue;
447461
}
448462

@@ -478,7 +492,7 @@ export function linkInputs(referencesToLinkAgainstEnvironment: readonly Identifi
478492
let allBuiltIn = true;
479493
for(const target of probableTarget) {
480494
// we can stick with maybe even if readId.attribute is always
481-
graph.addEdge(bodyInput, target, EdgeType.Reads);
495+
graph.addEdge(bodyInput.nodeId, target.nodeId, EdgeType.Reads);
482496
if(!isReferenceType(target.type, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction)) {
483497
allBuiltIn = false;
484498
}

src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,11 @@ export function processApply<OtherInfo>(
176176
}
177177
expensiveTrace(dataflowLogger, () => `Found ${resolved.length} references to open ref ${ingoing.nodeId} in closure of function definition ${rootId}`);
178178
let allBuiltIn = true;
179-
for(const ref of resolved) {
180-
information.graph.addEdge(ingoing, ref, EdgeType.Reads);
181-
information.graph.addEdge(rootId, ref, EdgeType.Reads); // because the def. is the anonymous call
182-
if(!isReferenceType(ref.type, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction)) {
179+
const inId = ingoing.nodeId;
180+
for(const { nodeId, type } of resolved) {
181+
information.graph.addEdge(inId, nodeId, EdgeType.Reads);
182+
information.graph.addEdge(rootId, nodeId, EdgeType.Reads); // because the def. is the anonymous call
183+
if(!isReferenceType(type, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction)) {
183184
allBuiltIn = false;
184185
}
185186
}

src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,12 +434,13 @@ export function markAsAssignment<OtherInfo>(
434434

435435
information.environment = define(nodeToDefine, assignmentConfig?.superAssignment, information.environment, data.ctx.config);
436436
information.graph.setDefinitionOfVertex(nodeToDefine);
437+
const nid = nodeToDefine.nodeId;
437438
if(!assignmentConfig?.quoteSource) {
438439
for(const sourceId of sourceIds) {
439-
information.graph.addEdge(nodeToDefine, sourceId, EdgeType.DefinedBy);
440+
information.graph.addEdge(nid, sourceId, EdgeType.DefinedBy);
440441
}
441442
}
442-
information.graph.addEdge(nodeToDefine, rootIdOfAssignment, EdgeType.DefinedBy);
443+
information.graph.addEdge(nid, rootIdOfAssignment, EdgeType.DefinedBy);
443444
// kinda dirty, but we have to remove existing read edges for the symbol, added by the child
444445
const out = information.graph.outgoingEdges(nodeToDefine.nodeId);
445446
for(const [id, edge] of (out ?? [])) {

src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,13 @@ function linkReadNameToWriteIfPossible(read: IdentifierReference, environments:
5353
return;
5454
}
5555

56+
const rid = read.nodeId;
5657
for(const target of probableTarget) {
57-
// we can stick with maybe even if readId.attribute is always
58-
nextGraph.addEdge(read, target, EdgeType.Reads);
58+
const tid = target.nodeId;
5959
if((read.type === ReferenceType.Function || read.type === ReferenceType.BuiltInFunction) && (isBuiltIn(target.definedAt))) {
60-
nextGraph.addEdge(read, target, EdgeType.Calls);
60+
nextGraph.addEdge(rid, tid, EdgeType.Reads | EdgeType.Calls);
61+
} else {
62+
nextGraph.addEdge(rid, tid, EdgeType.Reads);
6163
}
6264
}
6365
}

src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import { type DataflowProcessorInformation, processDataflowFor } from '../../../../../processor';
2-
import { type DataflowInformation, ExitPointType, overwriteExitPoints } from '../../../../../info';
2+
import {
3+
type DataflowInformation,
4+
ExitPointType,
5+
overwriteExitPoints
6+
} from '../../../../../info';
37
import {
48
getAllFunctionCallTargets,
59
linkArgumentsOnCall,
@@ -246,10 +250,11 @@ function updateNestedFunctionClosures(
246250
remainingIn.push(ingoing);
247251
continue;
248252
}
253+
const inId = ingoing.nodeId;
249254
expensiveTrace(dataflowLogger, () => `Found ${resolved.length} references to open ref ${id} in closure of function definition ${fnId}`);
250255
let allBuiltIn = true;
251256
for(const ref of resolved) {
252-
graph.addEdge(ingoing, ref, EdgeType.Reads);
257+
graph.addEdge(inId, ref.nodeId, EdgeType.Reads);
253258
if(!isReferenceType(ref.type, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction)) {
254259
allBuiltIn = false;
255260
}
@@ -307,6 +312,9 @@ export function updateNestedFunctionCalls(
307312
continue;
308313
}
309314
graph.addEdge(id, target, EdgeType.Calls);
315+
for(const exitPoint of targetVertex.exitPoints) {
316+
graph.addEdge(id, exitPoint.nodeId, EdgeType.Returns);
317+
}
310318
const ingoingRefs = targetVertex.subflow.in;
311319
const remainingIn: IdentifierReference[] = [];
312320
for(const ingoing of ingoingRefs) {
@@ -315,11 +323,12 @@ export function updateNestedFunctionCalls(
315323
remainingIn.push(ingoing);
316324
continue;
317325
}
326+
const inId = ingoing.nodeId;
318327
expensiveTrace(dataflowLogger, () => `Found ${resolved.length} references to open ref ${id} in closure of function definition ${id}`);
319-
for(const def of resolved) {
320-
if(!isBuiltIn(def.nodeId)) {
321-
graph.addEdge(ingoing, def, EdgeType.DefinedByOnCall);
322-
graph.addEdge(id, def, EdgeType.DefinesOnCall);
328+
for(const { nodeId } of resolved) {
329+
if(!isBuiltIn(nodeId)) {
330+
graph.addEdge(inId, nodeId, EdgeType.DefinedByOnCall);
331+
graph.addEdge(id, nodeId, EdgeType.DefinesOnCall);
323332
}
324333
}
325334
}
@@ -355,9 +364,10 @@ function findPromiseLinkagesForParameters(parameters: DataflowGraph, readInParam
355364
const remainingRead: IdentifierReference[] = [];
356365
for(const read of readInParameters) {
357366
const resolved = read.name ? resolveByName(read.name, parameterEnvs, read.type) : undefined;
367+
const rid = read.nodeId;
358368
if(resolved !== undefined) {
359-
for(const ref of resolved) {
360-
parameters.addEdge(read, ref, EdgeType.Reads);
369+
for(const { nodeId } of resolved) {
370+
parameters.addEdge(rid, nodeId, EdgeType.Reads);
361371
}
362372
continue;
363373
}
@@ -370,11 +380,11 @@ function findPromiseLinkagesForParameters(parameters: DataflowGraph, readInParam
370380
continue;
371381
}
372382
if(writingOuts[0].cds === undefined) {
373-
parameters.addEdge(read, writingOuts[0], EdgeType.Reads);
383+
parameters.addEdge(rid, writingOuts[0].nodeId, EdgeType.Reads);
374384
continue;
375385
}
376-
for(const out of writingOuts) {
377-
parameters.addEdge(read, out, EdgeType.Reads);
386+
for(const { nodeId } of writingOuts) {
387+
parameters.addEdge(rid, nodeId, EdgeType.Reads);
378388
}
379389
}
380390
return remainingRead;

src/dataflow/internal/process/functions/call/built-in/built-in-get.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ export function processGet<OtherInfo>(
5353
const firstArg = processedArguments[0];
5454
if(firstArg) {
5555
// get 'reads' its first argument
56-
information.graph.addEdge(rootId, firstArg.entryPoint, EdgeType.Reads);
56+
information.graph.addEdge(rootId, firstArg.entryPoint, EdgeType.Returns | EdgeType.Reads);
5757
}
5858

5959
return information;

src/dataflow/internal/process/functions/call/built-in/built-in-try-catch.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,9 @@ function promoteCallToFunction<OtherInfo>(call: NodeId, arg: NodeId, info: Dataf
155155
expensiveTrace(dataflowLogger, () => `Found ${resolved.length} references to open ref ${ingoing.nodeId} in closure of function definition ${call}`);
156156
let allBuiltIn = true;
157157
for(const ref of resolved) {
158-
info.graph.addEdge(ingoing, ref, EdgeType.Reads);
159-
info.graph.addEdge(call, ref, EdgeType.Reads); // because the def. is the anonymous call
158+
const rid = ref.nodeId;
159+
info.graph.addEdge(ingoing.nodeId, rid, EdgeType.Reads);
160+
info.graph.addEdge(call, rid, EdgeType.Reads); // because the def. is the anonymous call
160161
if(!isReferenceType(ref.type, ReferenceType.BuiltInConstant | ReferenceType.BuiltInFunction)) {
161162
allBuiltIn = false;
162163
}

src/dataflow/internal/process/functions/process-argument.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ export function linkReadsForArgument<OtherInfo>(root: RNode<OtherInfo & ParentIn
1717
const allIdsBeforeArguments = new Set(collectAllIds(root, n => n.type === RType.Argument && n.info.id !== root.info.id));
1818
const ingoingBeforeArgs = ingoingRefs.filter(r => allIdsBeforeArguments.has(r.nodeId));
1919

20+
const rid = root.info.id;
2021
for(const ref of ingoingBeforeArgs) {
2122
// link against the root reference currently I do not know how to deal with nested function calls otherwise
22-
graph.addEdge(root.info.id, ref, EdgeType.Reads);
23+
graph.addEdge(rid, ref.nodeId, EdgeType.Reads);
2324
}
2425
}
2526

0 commit comments

Comments
 (0)