Skip to content

Commit c7b7c22

Browse files
committed
perf: big perf improvement for call link res (closes #2128)
1 parent 8d1d618 commit c7b7c22

File tree

9 files changed

+99
-75
lines changed

9 files changed

+99
-75
lines changed

src/dataflow/environments/built-in-config.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export interface BuiltInConstantDefinition<Value> extends BaseBuiltInDefinition
2828
export interface BuiltInFunctionDefinition<BuiltInProcessor extends BuiltInMappingName> extends BaseBuiltInDefinition {
2929
readonly type: 'function';
3030
readonly processor: BuiltInProcessor;
31-
readonly config?: ConfigOfBuiltInMappingName<BuiltInProcessor>;
31+
readonly config?: ConfigOfBuiltInMappingName<BuiltInProcessor> & { libFn?: boolean };
3232
readonly evalHandler?: string
3333
}
3434

@@ -39,15 +39,14 @@ export interface BuiltInFunctionDefinition<BuiltInProcessor extends BuiltInMappi
3939
export interface BuiltInReplacementDefinition extends BaseBuiltInDefinition {
4040
readonly type: 'replacement';
4141
readonly suffixes: ('<<-' | '<-')[];
42-
readonly config: { readIndices: boolean }
42+
readonly config: { readIndices: boolean }
4343
}
4444

45-
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
46-
export type BuiltInDefinition = BuiltInConstantDefinition<any> | BuiltInFunctionDefinition<any> | BuiltInReplacementDefinition;
45+
export type BuiltInDefinition<T extends BuiltInMappingName = BuiltInMappingName> = BuiltInConstantDefinition<unknown> | BuiltInFunctionDefinition<T> | BuiltInReplacementDefinition;
4746
/**
4847
* @see DefaultBuiltinConfig
4948
*/
50-
export type BuiltInDefinitions = BuiltInDefinition[];
49+
export type BuiltInDefinitions<Keys extends BuiltInMappingName[] = BuiltInMappingName[]> = [...{ [ K in keyof Keys]: BuiltInDefinition<Keys[K]> }]
5150

5251

5352
/**
@@ -56,7 +55,7 @@ export type BuiltInDefinitions = BuiltInDefinition[];
5655
export function getDefaultBuiltInDefinitions(): BuiltIns {
5756
const builtIns = new BuiltIns();
5857
for(const definition of DefaultBuiltinConfig) {
59-
builtIns.registerBuiltInDefinition(definition);
58+
builtIns.registerBuiltInDefinition(definition as BuiltInDefinition);
6059
}
6160
return builtIns;
6261
}
@@ -66,7 +65,7 @@ export function getDefaultBuiltInDefinitions(): BuiltIns {
6665
* @param definitions - the list of built-in definitions
6766
* @param loadDefaults - whether to first add the {@link DefaultBuiltinConfig} before the given {@link definitions}
6867
*/
69-
export function getBuiltInDefinitions(definitions: BuiltInDefinitions, loadDefaults: boolean | undefined): BuiltIns {
68+
export function getBuiltInDefinitions<Keys extends BuiltInMappingName[]>(definitions: BuiltInDefinitions<Keys>, loadDefaults: boolean | undefined): BuiltIns {
7069
let builtIns = new BuiltIns();
7170

7271
if(loadDefaults) {

src/dataflow/environments/built-in.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ export interface BuiltInIdentifierDefinition extends IdentifierReference {
8181
type: ReferenceType.BuiltInFunction
8282
definedAt: BuiltIn
8383
processor: BuiltInIdentifierProcessor
84-
config?: object
84+
config?: ConfigOfBuiltInMappingName<BuiltInMappingName> & { libFn?: boolean }
8585
}
8686

8787
export interface BuiltInIdentifierConstant<T = unknown> extends IdentifierReference {

src/dataflow/environments/default-builtin-config.ts

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ function toRegex(n: readonly string[]): RegExp {
9999
/**
100100
* Contains the built-in definitions recognized by flowR
101101
*/
102-
export const DefaultBuiltinConfig: BuiltInDefinitions = [
102+
export const DefaultBuiltinConfig = [
103103
{ type: 'constant', names: ['NULL', 'NA', 'NaN', 'NA_integer_', 'NA_real_', 'NA_complex_', 'NA_character_'], value: null, assumePrimitive: true },
104104
{ type: 'constant', names: ['TRUE', 'T'], value: true, assumePrimitive: true },
105105
{ type: 'constant', names: ['FALSE', 'F'], value: false, assumePrimitive: true },
@@ -186,6 +186,7 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
186186
names: GgPlotAddons,
187187
processor: 'builtin:default',
188188
config: {
189+
libFn: true,
189190
forceArgs: 'all',
190191
hasUnknownSideEffects: {
191192
type: 'link-to-last-call',
@@ -197,6 +198,7 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
197198
names: TinyPlotAddons,
198199
processor: 'builtin:default',
199200
config: {
201+
libFn: true,
200202
forceArgs: 'all',
201203
hasUnknownSideEffects: {
202204
type: 'link-to-last-call',
@@ -208,6 +210,7 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
208210
names: ['image_write', 'image_capture', 'dev.capture', 'dev.off'],
209211
processor: 'builtin:default',
210212
config: {
213+
libFn: true,
211214
forceArgs: 'all',
212215
hasUnknownSideEffects: {
213216
type: 'link-to-last-call',
@@ -217,14 +220,17 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
217220
{ type: 'function', names: ['('], processor: 'builtin:default', config: { returnsNthArgument: 0 }, assumePrimitive: true },
218221
{ type: 'function', names: ['load', 'load_all', 'setwd', 'set.seed'], processor: 'builtin:default', config: { hasUnknownSideEffects: true, forceArgs: [true] }, assumePrimitive: false },
219222
{ type: 'function', names: ['body', 'formals', 'environment'], processor: 'builtin:default', config: { hasUnknownSideEffects: true, forceArgs: [true] }, assumePrimitive: true },
220-
{ type: 'function', names: ['.Call', '.External', '.C', '.Fortran'], processor: 'builtin:default', config: { hasUnknownSideEffects: true, forceArgs: [true],
221-
treatAsFnCall: {
222-
'.Call': ['.NAME'],
223-
'.External': ['.NAME'],
224-
'.C': ['.NAME'],
225-
'.Fortran': ['.NAME']
226-
}
227-
}, assumePrimitive: true },
223+
{ type: 'function',
224+
names: ['.Call', '.External', '.C', '.Fortran'],
225+
processor: 'builtin:default',
226+
config: { hasUnknownSideEffects: true, forceArgs: [true],
227+
treatAsFnCall: {
228+
'.Call': ['.NAME'],
229+
'.External': ['.NAME'],
230+
'.C': ['.NAME'],
231+
'.Fortran': ['.NAME']
232+
}
233+
}, assumePrimitive: true },
228234
{ type: 'function', names: ['eval'], processor: 'builtin:eval', config: { includeFunctionCall: true }, assumePrimitive: true },
229235
{ type: 'function', names: ['cat'], processor: 'builtin:default', config: { forceArgs: 'all', hasUnknownSideEffects: { type: 'link-to-last-call', callName: /^sink$/ } }, assumePrimitive: false },
230236
{ type: 'function', names: ['switch'], processor: 'builtin:default', config: { forceArgs: [true] }, assumePrimitive: false },
@@ -257,8 +263,8 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
257263
{ type: 'function', names: ['while'], processor: 'builtin:while-loop', config: {}, assumePrimitive: true },
258264
{ type: 'function', names: ['do.call'], processor: 'builtin:apply', config: { indexOfFunction: 0, unquoteFunction: true }, assumePrimitive: true },
259265
{ type: 'function', names: ['.Primitive', '.Internal'], processor: 'builtin:apply', config: { indexOfFunction: 0, unquoteFunction: true, resolveInEnvironment: 'global' }, assumePrimitive: true },
260-
{ type: 'function', names: ['interference'], processor: 'builtin:apply', config: { unquoteFunction: true, nameOfFunctionArgument: 'propensity_integrand' }, assumePrimitive: false },
261-
{ type: 'function', names: ['ddply'], processor: 'builtin:apply', config: { unquoteFunction: true, indexOfFunction: 2, nameOfFunctionArgument: '.fun' }, assumePrimitive: false },
266+
{ type: 'function', names: ['interference'], processor: 'builtin:apply', config: { unquoteFunction: true, nameOfFunctionArgument: 'propensity_integrand', libFn: true }, assumePrimitive: false },
267+
{ type: 'function', names: ['ddply'], processor: 'builtin:apply', config: { unquoteFunction: true, indexOfFunction: 2, nameOfFunctionArgument: '.fun', libFn: true }, assumePrimitive: false },
262268
{ type: 'function', names: ['list'], processor: 'builtin:list', config: {}, assumePrimitive: true },
263269
{ type: 'function', names: ['c'], processor: 'builtin:vector', config: {}, assumePrimitive: true, evalHandler: 'builtin:c' },
264270
{
@@ -275,11 +281,9 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
275281
{
276282
type: 'function',
277283
names: [
278-
'on.exit', 'sys.on.exit', 'par', 'tpar', 'sink', 'tinytheme', 'theme_set',
284+
'on.exit', 'sys.on.exit', 'par', 'tpar', 'sink',
279285
/* library and require is handled above */
280286
'requireNamespace', 'loadNamespace', 'attachNamespace', 'asNamespace',
281-
/* downloader and installer functions (R, devtools, BiocManager) */
282-
'library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages',
283287
/* weird env attachments */
284288
'attach', 'unname', 'data',
285289
/* file creation/removal */
@@ -289,6 +293,17 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
289293
config: { hasUnknownSideEffects: true },
290294
assumePrimitive: false
291295
},
296+
{
297+
type: 'function',
298+
names: [
299+
'tinytheme', 'theme_set',
300+
/* downloader and installer functions (R, devtools, BiocManager) */
301+
'library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages',
302+
],
303+
processor: 'builtin:default',
304+
config: { hasUnknownSideEffects: true, libFn: true },
305+
assumePrimitive: false
306+
},
292307
/* they are all mapped to `<-` but we separate super assignments */
293308
{
294309
type: 'replacement',
@@ -306,17 +321,17 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
306321
readIndices: false
307322
}
308323
}
309-
];
324+
] as const satisfies BuiltInDefinitions;
310325

311326

312327
/**
313-
*
328+
* Expensive and naive lookup of the default processor for a built-in function name
314329
*/
315330
export function getDefaultProcessor(name: string): BuiltInMappingName | 'unnamed' | undefined {
316331
if(name.startsWith(UnnamedFunctionCallPrefix)) {
317332
return 'unnamed';
318333
}
319-
const fn = DefaultBuiltinConfig.find(def => (def.names.includes(name) && def.type !== 'constant')
334+
const fn = DefaultBuiltinConfig.find(def => ((def.names as string[]).includes(name) && def.type !== 'constant')
320335
|| (def.type === 'replacement' && def.suffixes.flatMap(d => def.names.map(n => `${n}${d}`)).includes(name))
321336
) as BuiltInFunctionDefinition<'builtin:default'> | BuiltInReplacementDefinition | undefined;
322337
return fn?.type === 'replacement' ? 'builtin:replacement' : fn?.processor as BuiltInMappingName;

src/dataflow/environments/environment.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,9 @@ export class Environment implements IEnvironment {
155155
// we need to make a copy to avoid side effects for old reference in other environments
156156
const updatedOld: IdentifierDefinition[] = old?.slice() ?? [];
157157
for(const v of values) {
158-
const index = updatedOld.findIndex(o => o.nodeId === v.nodeId && o.definedAt === v.definedAt);
159-
if(index >= 0) {
158+
const { nodeId, definedAt } = v;
159+
const index = updatedOld.find(o => o.nodeId === nodeId && o.definedAt === definedAt);
160+
if(index) {
160161
continue;
161162
}
162163
if(applyCds === undefined) {

src/dataflow/graph/dataflowgraph-builder.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ export class DataflowGraphBuilder<
413413
*
414414
*/
415415
export function getBuiltInSideEffect(name: string): LinkTo<RegExp> | undefined {
416-
const got = DefaultBuiltinConfig.find(e => e.names.includes(name));
416+
const got = DefaultBuiltinConfig.find(e => (e.names as string[]).includes(name));
417417
if(got?.type !== 'function') {
418418
return undefined;
419419
}

src/dataflow/internal/linker.ts

Lines changed: 54 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { DefaultMap } from '../../util/collections/defaultmap';
2-
import { guard, isNotUndefined } from '../../util/assert';
2+
import { isNotUndefined } from '../../util/assert';
33
import { expensiveTrace, log } from '../../util/log';
44
import { type NodeId, recoverName } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
55
import { type IdentifierReference, isReferenceType, ReferenceType } from '../environments/identifier';
@@ -8,7 +8,7 @@ import type { RParameter } from '../../r-bridge/lang-4.x/ast/model/nodes/r-param
88
import type { AstIdMap, ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
99
import { dataflowLogger } from '../logger';
1010
import { EmptyArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
11-
import { edgeDoesNotIncludeType, edgeIncludesType, EdgeType } from '../graph/edge';
11+
import { edgeIncludesType, EdgeType } from '../graph/edge';
1212
import { RType } from '../../r-bridge/lang-4.x/ast/model/type';
1313
import {
1414
type DataflowGraphVertexFunctionCall,
@@ -193,6 +193,7 @@ export function linkFunctionCallWithSingleTarget(
193193
linkFunctionCallArguments(def.id, idMap, defName, id, info.args, graph);
194194
}
195195

196+
const FCallLinkReadBits = EdgeType.Reads | EdgeType.Calls;
196197
/* there is _a lot_ potential for optimization here */
197198
function linkFunctionCall(
198199
graph: DataflowGraph,
@@ -211,19 +212,19 @@ function linkFunctionCall(
211212
return;
212213
}
213214

214-
const readBits = EdgeType.Reads | EdgeType.Calls;
215-
const functionDefinitionReadIds = [...edges].filter(([_, e]) =>
216-
edgeDoesNotIncludeType(e.types, EdgeType.Argument)
217-
&& edgeIncludesType(e.types, readBits)
218-
).map(([target, _]) => target);
215+
const functionDefinitionReadIds = new Set<NodeId>();
216+
for(const [t, { types }] of edges.entries()) {
217+
if(!isBuiltIn(t) && edgeIncludesType(types, FCallLinkReadBits)) {
218+
functionDefinitionReadIds.add(t);
219+
}
220+
}
219221

220-
const functionDefs = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph)[0];
222+
const [functionDefs] = getAllLinkedFunctionDefinitions(new Set(functionDefinitionReadIds), graph);
221223
for(const def of functionDefs.values()) {
222-
guard(def.tag === VertexType.FunctionDefinition, () => `expected function definition, but got ${def.tag}`);
223224
linkFunctionCallWithSingleTarget(graph, def, info, idMap);
224225
}
225226
if(thisGraph.isRoot(id) && functionDefs.size > 0) {
226-
calledFunctionDefinitions.push({ functionCall: id, called: [...functionDefs.values()] });
227+
calledFunctionDefinitions.push({ functionCall: id, called: functionDefs.values().toArray() });
227228
}
228229
}
229230

@@ -241,7 +242,9 @@ export function linkFunctionCalls(
241242
): { functionCall: NodeId, called: readonly DataflowGraphVertexInfo[] }[] {
242243
const calledFunctionDefinitions: { functionCall: NodeId, called: DataflowGraphVertexInfo[] }[] = [];
243244
for(const [id, info] of thisGraph.verticesOfType(VertexType.FunctionCall)) {
244-
linkFunctionCall(graph, id, info as DataflowGraphVertexFunctionCall, idMap, thisGraph, calledFunctionDefinitions);
245+
if(!info.onlyBuiltin) {
246+
linkFunctionCall(graph, id, info, idMap, thisGraph, calledFunctionDefinitions);
247+
}
245248
}
246249
return calledFunctionDefinitions;
247250
}
@@ -275,28 +278,35 @@ export function getAllFunctionCallTargets(call: NodeId, graph: DataflowGraph, en
275278
for(const target of functionCallTargets) {
276279
found.push(target.id);
277280
}
278-
found = found.concat(...builtInTargets, functionCallDefs);
281+
found = found.concat(Array.from(builtInTargets), functionCallDefs);
279282
}
280283

281284
return found;
282285
}
283286

287+
const LinkedFnFollowBits = EdgeType.Reads | EdgeType.DefinedBy | EdgeType.DefinedByOnCall;
288+
284289
/**
285290
* Finds all linked function definitions starting from the given set of read ids.
286291
*/
287292
export function getAllLinkedFunctionDefinitions(
288293
functionDefinitionReadIds: ReadonlySet<NodeId>,
289294
dataflowGraph: DataflowGraph
290-
): [Set<DataflowGraphVertexInfo>, Set<BuiltIn>] {
291-
let potential: NodeId[] = functionDefinitionReadIds.values().toArray();
292-
const visited = new Set<NodeId>();
293-
const result = new Set<DataflowGraphVertexInfo>();
295+
): [Set<Required<DataflowGraphVertexFunctionDefinition>>, Set<BuiltIn>] {
296+
const result = new Set<Required<DataflowGraphVertexFunctionDefinition>>();
294297
const builtIns = new Set<BuiltIn>();
295298

296-
while(potential.length > 0) {
299+
if(functionDefinitionReadIds.size === 0) {
300+
return [result, builtIns];
301+
}
302+
303+
const potential: NodeId[] = Array.from(functionDefinitionReadIds);
304+
const visited = new Set<NodeId>();
305+
306+
while(potential.length !== 0) {
297307
const currentId = potential.pop() as NodeId;
308+
visited.add(currentId);
298309

299-
// do not traverse builtins further
300310
if(isBuiltIn(currentId)) {
301311
builtIns.add(currentId);
302312
continue;
@@ -306,32 +316,36 @@ export function getAllLinkedFunctionDefinitions(
306316
if(currentInfo === undefined) {
307317
continue;
308318
}
309-
visited.add(currentId);
310319

311-
const outgoingEdges = currentInfo[1].entries().toArray();
320+
const [vertex, edges] = currentInfo;
312321

313-
const returnEdges = outgoingEdges
314-
.filter(([_, e]) => edgeIncludesType(e.types, EdgeType.Returns));
315-
if(returnEdges.length > 0) {
316-
// only traverse return edges and do not follow `calls` etc. as this indicates that we have a function call which returns a result, and not the function calls itself
317-
potential = potential.concat(returnEdges.map(([target]) => target).filter(id => !visited.has(id)));
322+
// Found a function definition
323+
if(vertex.subflow !== undefined) {
324+
result.add(vertex as Required<DataflowGraphVertexFunctionDefinition>);
318325
continue;
319326
}
320327

321-
const followBits = EdgeType.Reads | EdgeType.DefinedBy | EdgeType.DefinedByOnCall;
322-
const followEdges = outgoingEdges.filter(([_, e]) => edgeIncludesType(e.types, followBits));
328+
let hasReturnEdge = false;
329+
for(const [target, { types }] of edges) {
330+
if(edgeIncludesType(types, EdgeType.Returns)) {
331+
hasReturnEdge = true;
332+
if(!visited.has(target)) {
333+
potential.push(target);
334+
}
335+
}
336+
}
323337

324-
if(currentInfo[0].subflow !== undefined) {
325-
result.add(currentInfo[0]);
338+
if(hasReturnEdge) {
339+
continue;
326340
}
327341

328-
// trace all joined reads
329-
potential = potential.concat(
330-
followEdges
331-
.map(([target]) => target)
332-
.filter(id => !visited.has(id))
333-
);
342+
for(const [target, { types }] of edges) {
343+
if(edgeIncludesType(types, LinkedFnFollowBits) && !visited.has(target)) {
344+
potential.push(target);
345+
}
346+
}
334347
}
348+
335349
return [result, builtIns];
336350
}
337351

@@ -393,10 +407,10 @@ export function linkCircularRedefinitionsWithinALoop(graph: DataflowGraph, openI
393407
}
394408

395409
for(const [name, targets] of openIns.entries()) {
396-
for(const out of lastOutgoing.values()) {
397-
if(out.name === name) {
410+
for(const { name: outName, nodeId } of lastOutgoing.values()) {
411+
if(outName === name) {
398412
for(const target of targets) {
399-
graph.addEdge(target.nodeId, out.nodeId, EdgeType.Reads);
413+
graph.addEdge(target.nodeId, nodeId, EdgeType.Reads);
400414
}
401415
}
402416
}
@@ -412,8 +426,9 @@ export function reapplyLoopExitPoints(exits: readonly ExitPoint[], references: r
412426

413427
for(const ref of references) {
414428
for(const cd of exitCds) {
429+
const { id: cId, when: cWhen } = cd;
415430
if(ref.controlDependencies) {
416-
if(!ref.controlDependencies?.find(c => c.id === cd.id && c.when === cd.when)) {
431+
if(!ref.controlDependencies?.find(c => c.id === cId && c.when === cWhen)) {
417432
ref.controlDependencies.push({ ...cd, byIteration: true });
418433
}
419434
} else {

src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ export interface BuiltInApplyConfiguration extends MergeableRecord {
3030
/** Should we unquote the function if it is given as a string? */
3131
readonly unquoteFunction?: boolean
3232
/** Should the function be resolved in the global environment? */
33-
readonly resolveInEnvironment: 'global' | 'local'
33+
readonly resolveInEnvironment?: 'global' | 'local'
3434
/** Should the value of the function be resolved? */
3535
readonly resolveValue?: boolean
3636
}

0 commit comments

Comments
 (0)