Skip to content

Commit 979f52d

Browse files
authored
[Dataflow] Extractor instrumentation is here! (#2204)
* feat-fix: allow cf to be undefined * refactor: heat countmap eval * refactor: clean up config query update format * feat-fix: add heat config flag to the interface wiki * refactor: repl no longe rappends out of fear of state acc
1 parent 5e18a3b commit 979f52d

File tree

7 files changed

+121
-11
lines changed

7 files changed

+121
-11
lines changed

src/cli/repl/core.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import type { FlowrConfigOptions } from '../../config';
2020
import { genericWrapReplFailIfNoRequest, SupportedQueries, type SupportedQuery } from '../../queries/query';
2121
import type { FlowrAnalyzer } from '../../project/flowr-analyzer';
2222
import { startAndEndsWith } from '../../util/text/strings';
23+
import type { RType } from '../../r-bridge/lang-4.x/ast/model/type';
24+
import { instrumentDataflowCount } from '../../dataflow/instrument/instrument-dataflow-count';
2325

2426
let _replCompleterKeywords: string[] | undefined = undefined;
2527
function replCompleterKeywords() {
@@ -128,6 +130,10 @@ export function handleString(code: string) {
128130

129131
async function replProcessStatement(output: ReplOutput, statement: string, analyzer: FlowrAnalyzer, allowRSessionAccess: boolean): Promise<void> {
130132
const time = Date.now();
133+
const heatMap = new Map<RType, number>();
134+
if(analyzer.inspectContext().config.repl.dfProcessorHeat) {
135+
analyzer.context().config.solver.instrument.dataflowExtractors = instrumentDataflowCount(heatMap, map => map.clear());
136+
}
131137
if(statement.startsWith(':')) {
132138
const command = statement.slice(1).split(' ')[0].toLowerCase();
133139
const processor = getCommand(command);
@@ -179,6 +185,24 @@ async function replProcessStatement(output: ReplOutput, statement: string, analy
179185
// do nothing, this is just a nice-to-have
180186
}
181187
}
188+
if(heatMap.size > 0 && analyzer.inspectContext().config.repl.dfProcessorHeat) {
189+
const sorted = Array.from(heatMap.entries()).sort((a, b) => b[1] - a[1]);
190+
console.log(output.formatter.format('[REPL Stats] Dataflow Processor Heatmap:', {
191+
style: FontStyles.Italic,
192+
effect: ColorEffect.Foreground,
193+
color: Colors.White
194+
}));
195+
const longestKey = Math.max(...Array.from(heatMap.keys(), k => k.length));
196+
const longestValue = Math.max(...Array.from(heatMap.values(), v => v.toString().length));
197+
for(const [rType, count] of sorted) {
198+
console.log(output.formatter.format(` - ${(rType + ':').padEnd(longestKey + 1, ' ')} ${count.toString().padStart(longestValue, ' ')}`, {
199+
style: FontStyles.Italic,
200+
effect: ColorEffect.Foreground,
201+
color: Colors.White
202+
}));
203+
}
204+
}
205+
182206
}
183207

184208
/**

src/config.ts

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ import Joi from 'joi';
77
import type { BuiltInDefinitions } from './dataflow/environments/built-in-config';
88
import type { KnownParser } from './r-bridge/parser';
99
import type { DeepWritable } from 'ts-essentials';
10+
import type { DataflowProcessors } from './dataflow/processor';
11+
import type { ParentInformation } from './r-bridge/lang-4.x/ast/model/processing/decorate';
12+
import type { FlowrAnalyzerContext } from './project/context/flowr-analyzer-context';
1013

1114
export enum VariableResolve {
1215
/** Don't resolve constants at all */
@@ -110,7 +113,9 @@ export interface FlowrConfigOptions extends MergeableRecord {
110113
/** Configuration options for the REPL */
111114
readonly repl: {
112115
/** Whether to show quick stats in the REPL after each evaluation */
113-
quickStats: boolean
116+
quickStats: boolean
117+
/** This instruments the dataflow processors to count how often each processor is called */
118+
dfProcessorHeat: boolean;
114119
}
115120
readonly project: {
116121
/** Whether to resolve unknown paths loaded by the r project disk when trying to source/analyze files */
@@ -146,6 +151,15 @@ export interface FlowrConfigOptions extends MergeableRecord {
146151
*/
147152
readonly maxIndexCount: number
148153
},
154+
/** These keys are only intended for use within code, allowing to instrument the dataflow analyzer! */
155+
readonly instrument: {
156+
/**
157+
* Modify the dataflow processors used during dataflow analysis.
158+
* Make sure that all processors required for correct analysis are still present!
159+
* This may have arbitrary consequences on the analysis precision and performance, consider focusing on decorating existing processors instead of replacing them.
160+
*/
161+
dataflowExtractors?: (extractor: DataflowProcessors<ParentInformation>, ctx: FlowrAnalyzerContext) => DataflowProcessors<ParentInformation>
162+
},
149163
/**
150164
* If lax source calls are active, flowR searches for sourced files much more freely,
151165
* based on the configurations you give it.
@@ -238,7 +252,8 @@ export const defaultConfigOptions: FlowrConfigOptions = {
238252
}
239253
},
240254
repl: {
241-
quickStats: false
255+
quickStats: false,
256+
dfProcessorHeat: false
242257
},
243258
project: {
244259
resolveUnknownPathsOnDisk: true
@@ -256,6 +271,9 @@ export const defaultConfigOptions: FlowrConfigOptions = {
256271
searchPath: [],
257272
repeatedSourceLimit: 2
258273
},
274+
instrument: {
275+
dataflowExtractors: undefined
276+
},
259277
slicer: {
260278
threshold: 50
261279
}
@@ -283,7 +301,8 @@ export const flowrConfigFileSchema = Joi.object({
283301
}).optional().description('Semantics regarding how to handle the R environment.')
284302
}).description('Configure language semantics and how flowR handles them.'),
285303
repl: Joi.object({
286-
quickStats: Joi.boolean().optional().description('Whether to show quick stats in the REPL after each evaluation.')
304+
quickStats: Joi.boolean().optional().description('Whether to show quick stats in the REPL after each evaluation.'),
305+
dfProcessorHeat: Joi.boolean().optional().description('This instruments the dataflow processors to count how often each processor is called.')
287306
}).description('Configuration options for the REPL.'),
288307
project: Joi.object({
289308
resolveUnknownPathsOnDisk: Joi.boolean().optional().description('Whether to resolve unknown paths loaded by the r project disk when trying to source/analyze files.')
@@ -310,6 +329,9 @@ export const flowrConfigFileSchema = Joi.object({
310329
maxIndexCount: Joi.number().required().description('The maximum number of indices tracked per object with the pointer analysis.')
311330
})
312331
).description('Whether to track pointers in the dataflow graph, if not, the graph will be over-approximated wrt. containers and accesses.'),
332+
instrument: Joi.object({
333+
dataflowExtractors: Joi.any().optional().description('These keys are only intended for use within code, allowing to instrument the dataflow analyzer!')
334+
}),
313335
resolveSource: Joi.object({
314336
dropPaths: Joi.string().valid(...Object.values(DropPathsOption)).description('Allow to drop the first or all parts of the sourced path, if it is relative.'),
315337
ignoreCapitalization: Joi.boolean().description('Search for filenames matching in the lowercase.'),

src/dataflow/extractor.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ function resolveLinkToSideEffects(ast: NormalizedAst, graph: DataflowGraph) {
9696
continue;
9797
}
9898
/* this has to change whenever we add a new link to relations because we currently offer no abstraction for the type */
99-
const potentials = identifyLinkToLastCallRelation(s.id, cf.graph, graph, s.linkTo, knownCalls);
99+
const potentials = identifyLinkToLastCallRelation(s.id, cf?.graph, graph, s.linkTo, knownCalls);
100100
for(const pot of potentials) {
101101
graph.addEdge(s.id, pot, EdgeType.Reads);
102102
}
@@ -129,7 +129,7 @@ export function produceDataFlowGraph<OtherInfo>(
129129
parser,
130130
completeAst,
131131
environment: ctx.env.makeCleanEnv(),
132-
processors,
132+
processors: ctx.config.solver.instrument.dataflowExtractors?.(processors, ctx) ?? processors,
133133
controlDependencies: undefined,
134134
referenceChain: [files[0].filePath],
135135
ctx
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import type { DataflowProcessors } from '../processor';
2+
import type { ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate';
3+
import type { FlowrAnalyzerContext } from '../../project/context/flowr-analyzer-context';
4+
import type { RType } from '../../r-bridge/lang-4.x/ast/model/type';
5+
import type { RNode } from '../../r-bridge/lang-4.x/ast/model/model';
6+
import type { DataflowInformation } from '../info';
7+
8+
/**
9+
* This takes the out parameter `countMap` and fills it with the count of how many times each RType was processed.
10+
* The accompanying `reset` function can be used to reset the map to an empty state.
11+
*/
12+
export function instrumentDataflowCount(countMap: Map<RType, number>, reset: (map: Map<RType, number>) => void): (extractor: DataflowProcessors<ParentInformation>, ctx: FlowrAnalyzerContext) => DataflowProcessors<ParentInformation> {
13+
return (extractor, _ctx) => {
14+
reset(countMap);
15+
const instrumented: DataflowProcessors<ParentInformation> = {} as DataflowProcessors<ParentInformation>;
16+
for(const [key, processor] of Object.entries(extractor) as [RType, (...args: unknown[]) => DataflowInformation][]) {
17+
instrumented[key as RNode['type']] = ((...args: unknown[]) => {
18+
const prev = countMap.get(key) ?? 0;
19+
countMap.set(key, prev + 1);
20+
return processor(...args);
21+
}) as never;
22+
}
23+
return instrumented;
24+
};
25+
}

src/documentation/wiki-interface.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ ${codeBlock('json', JSON.stringify(
252252
}
253253
},
254254
repl: {
255-
quickStats: false
255+
quickStats: false,
256+
dfProcessorHeat: false
256257
},
257258
project: {
258259
resolveUnknownPathsOnDisk: true
@@ -268,7 +269,8 @@ ${codeBlock('json', JSON.stringify(
268269
inferWorkingDirectory: InferWorkingDirectory.ActiveScript,
269270
searchPath: []
270271
},
271-
slicer: {
272+
instrument: {},
273+
slicer: {
272274
threshold: 50
273275
}
274276
},

src/documentation/wiki-query.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ ${
533533
}
534534
535535
One of the most useful options to change on-the-fly are probably those under \`repl\`. For example, setting \`repl.quickStats=true\`
536-
enables quick statistics after each REPL command.
536+
enables quick statistics after each REPL command. Likewise, setting \`repl.dfProcessorHeat=true\` enables the dataflow processor heatmap after each REPL command.
537537
`;
538538
}
539539
});

src/queries/catalog/config-query/config-query-format.ts

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import Joi from 'joi';
66
import type { FlowrConfigOptions } from '../../../config';
77
import { jsonReplacer } from '../../../util/json';
88
import type { DeepPartial } from 'ts-essentials';
9-
import type { ParsedQueryLine, SupportedQuery } from '../../query';
9+
import type { ParsedQueryLine, Query, SupportedQuery } from '../../query';
1010
import type { ReplOutput } from '../../../cli/repl/commands/repl-main';
1111
import type { CommandCompletions } from '../../../cli/repl/core';
1212

@@ -83,12 +83,49 @@ function configQueryLineParser(output: ReplOutput, line: readonly string[], _con
8383
};
8484
}
8585

86+
function collectKeysFromUpdate(update: DeepPartial<FlowrConfigOptions>, prefix: string = ''): string[] {
87+
// only collect leaf keys
88+
const keys: string[] = [];
89+
for(const [key, value] of Object.entries(update)) {
90+
const fullKey = prefix ? `${prefix}.${key}` : key;
91+
if(value && typeof value === 'object' && !Array.isArray(value)) {
92+
keys.push(...collectKeysFromUpdate(value as DeepPartial<FlowrConfigOptions>, fullKey));
93+
} else {
94+
keys.push(fullKey);
95+
}
96+
}
97+
return keys;
98+
}
99+
100+
function getValueAtPath(obj: object, path: string[]): unknown {
101+
let current: unknown = obj;
102+
for(const key of path) {
103+
if(current && typeof current === 'object' && (current as Record<string, unknown>)[key] !== undefined) {
104+
current = (current as Record<string, unknown>)[key];
105+
} else {
106+
return undefined;
107+
}
108+
}
109+
return current;
110+
}
111+
86112
export const ConfigQueryDefinition = {
87113
executor: executeConfigQuery,
88-
asciiSummarizer: (formatter: OutputFormatter, _analyzer: unknown, queryResults: BaseQueryResult, result: string[]) => {
114+
asciiSummarizer: (formatter: OutputFormatter, _analyzer: unknown, queryResults: BaseQueryResult, result: string[], queries: readonly Query[]) => {
89115
const out = queryResults as ConfigQueryResult;
90116
result.push(`Query: ${bold('config', formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
91-
result.push(` ╰ Config:\n${JSON.stringify(out.config, jsonReplacer, 4)}`);
117+
const configQueries = queries.filter(q => q.type === 'config');
118+
if(configQueries.some(q => q.update)) {
119+
const updatedKeys = configQueries.flatMap(q => q.update ? collectKeysFromUpdate(q.update) : []);
120+
result.push(' ╰ Updated configuration:');
121+
for(const key of updatedKeys) {
122+
const path = key.split('.');
123+
const newValue = getValueAtPath(out.config, path);
124+
result.push(` - ${key}: ${JSON.stringify(newValue, jsonReplacer)}`);
125+
}
126+
} else {
127+
result.push(` ╰ Config:\n${JSON.stringify(out.config, jsonReplacer, 4)}`);
128+
}
92129
return true;
93130
},
94131
completer: configReplCompleter,

0 commit comments

Comments
 (0)