diff --git a/src/cli/repl/core.ts b/src/cli/repl/core.ts index 000b5af4b6a..fe67911e176 100644 --- a/src/cli/repl/core.ts +++ b/src/cli/repl/core.ts @@ -20,6 +20,8 @@ import type { FlowrConfigOptions } from '../../config'; import { genericWrapReplFailIfNoRequest, SupportedQueries, type SupportedQuery } from '../../queries/query'; import type { FlowrAnalyzer } from '../../project/flowr-analyzer'; import { startAndEndsWith } from '../../util/text/strings'; +import type { RType } from '../../r-bridge/lang-4.x/ast/model/type'; +import { instrumentDataflowCount } from '../../dataflow/instrument/instrument-dataflow-count'; let _replCompleterKeywords: string[] | undefined = undefined; function replCompleterKeywords() { @@ -128,6 +130,10 @@ export function handleString(code: string) { async function replProcessStatement(output: ReplOutput, statement: string, analyzer: FlowrAnalyzer, allowRSessionAccess: boolean): Promise { const time = Date.now(); + const heatMap = new Map(); + if(analyzer.inspectContext().config.repl.dfProcessorHeat) { + analyzer.context().config.solver.instrument.dataflowExtractors = instrumentDataflowCount(heatMap, map => map.clear()); + } if(statement.startsWith(':')) { const command = statement.slice(1).split(' ')[0].toLowerCase(); const processor = getCommand(command); @@ -179,6 +185,24 @@ async function replProcessStatement(output: ReplOutput, statement: string, analy // do nothing, this is just a nice-to-have } } + if(heatMap.size > 0 && analyzer.inspectContext().config.repl.dfProcessorHeat) { + const sorted = Array.from(heatMap.entries()).sort((a, b) => b[1] - a[1]); + console.log(output.formatter.format('[REPL Stats] Dataflow Processor Heatmap:', { + style: FontStyles.Italic, + effect: ColorEffect.Foreground, + color: Colors.White + })); + const longestKey = Math.max(...Array.from(heatMap.keys(), k => k.length)); + const longestValue = Math.max(...Array.from(heatMap.values(), v => v.toString().length)); + for(const [rType, count] of sorted) { + console.log(output.formatter.format(` - ${(rType + ':').padEnd(longestKey + 1, ' ')} ${count.toString().padStart(longestValue, ' ')}`, { + style: FontStyles.Italic, + effect: ColorEffect.Foreground, + color: Colors.White + })); + } + } + } /** diff --git a/src/config.ts b/src/config.ts index bd2c77f507c..981a85f1758 100644 --- a/src/config.ts +++ b/src/config.ts @@ -7,6 +7,9 @@ import Joi from 'joi'; import type { BuiltInDefinitions } from './dataflow/environments/built-in-config'; import type { KnownParser } from './r-bridge/parser'; import type { DeepWritable } from 'ts-essentials'; +import type { DataflowProcessors } from './dataflow/processor'; +import type { ParentInformation } from './r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { FlowrAnalyzerContext } from './project/context/flowr-analyzer-context'; export enum VariableResolve { /** Don't resolve constants at all */ @@ -110,7 +113,9 @@ export interface FlowrConfigOptions extends MergeableRecord { /** Configuration options for the REPL */ readonly repl: { /** Whether to show quick stats in the REPL after each evaluation */ - quickStats: boolean + quickStats: boolean + /** This instruments the dataflow processors to count how often each processor is called */ + dfProcessorHeat: boolean; } readonly project: { /** Whether to resolve unknown paths loaded by the r project disk when trying to source/analyze files */ @@ -146,6 +151,15 @@ export interface FlowrConfigOptions extends MergeableRecord { */ readonly maxIndexCount: number }, + /** These keys are only intended for use within code, allowing to instrument the dataflow analyzer! */ + readonly instrument: { + /** + * Modify the dataflow processors used during dataflow analysis. + * Make sure that all processors required for correct analysis are still present! + * This may have arbitrary consequences on the analysis precision and performance, consider focusing on decorating existing processors instead of replacing them. + */ + dataflowExtractors?: (extractor: DataflowProcessors, ctx: FlowrAnalyzerContext) => DataflowProcessors + }, /** * If lax source calls are active, flowR searches for sourced files much more freely, * based on the configurations you give it. @@ -238,7 +252,8 @@ export const defaultConfigOptions: FlowrConfigOptions = { } }, repl: { - quickStats: false + quickStats: false, + dfProcessorHeat: false }, project: { resolveUnknownPathsOnDisk: true @@ -256,6 +271,9 @@ export const defaultConfigOptions: FlowrConfigOptions = { searchPath: [], repeatedSourceLimit: 2 }, + instrument: { + dataflowExtractors: undefined + }, slicer: { threshold: 50 } @@ -283,7 +301,8 @@ export const flowrConfigFileSchema = Joi.object({ }).optional().description('Semantics regarding how to handle the R environment.') }).description('Configure language semantics and how flowR handles them.'), repl: Joi.object({ - quickStats: Joi.boolean().optional().description('Whether to show quick stats in the REPL after each evaluation.') + quickStats: Joi.boolean().optional().description('Whether to show quick stats in the REPL after each evaluation.'), + dfProcessorHeat: Joi.boolean().optional().description('This instruments the dataflow processors to count how often each processor is called.') }).description('Configuration options for the REPL.'), project: Joi.object({ resolveUnknownPathsOnDisk: Joi.boolean().optional().description('Whether to resolve unknown paths loaded by the r project disk when trying to source/analyze files.') @@ -310,6 +329,9 @@ export const flowrConfigFileSchema = Joi.object({ maxIndexCount: Joi.number().required().description('The maximum number of indices tracked per object with the pointer analysis.') }) ).description('Whether to track pointers in the dataflow graph, if not, the graph will be over-approximated wrt. containers and accesses.'), + instrument: Joi.object({ + dataflowExtractors: Joi.any().optional().description('These keys are only intended for use within code, allowing to instrument the dataflow analyzer!') + }), resolveSource: Joi.object({ dropPaths: Joi.string().valid(...Object.values(DropPathsOption)).description('Allow to drop the first or all parts of the sourced path, if it is relative.'), ignoreCapitalization: Joi.boolean().description('Search for filenames matching in the lowercase.'), diff --git a/src/dataflow/extractor.ts b/src/dataflow/extractor.ts index d8720d98df1..f42d45af832 100644 --- a/src/dataflow/extractor.ts +++ b/src/dataflow/extractor.ts @@ -96,7 +96,7 @@ function resolveLinkToSideEffects(ast: NormalizedAst, graph: DataflowGraph) { continue; } /* this has to change whenever we add a new link to relations because we currently offer no abstraction for the type */ - const potentials = identifyLinkToLastCallRelation(s.id, cf.graph, graph, s.linkTo, knownCalls); + const potentials = identifyLinkToLastCallRelation(s.id, cf?.graph, graph, s.linkTo, knownCalls); for(const pot of potentials) { graph.addEdge(s.id, pot, EdgeType.Reads); } @@ -129,7 +129,7 @@ export function produceDataFlowGraph( parser, completeAst, environment: ctx.env.makeCleanEnv(), - processors, + processors: ctx.config.solver.instrument.dataflowExtractors?.(processors, ctx) ?? processors, controlDependencies: undefined, referenceChain: [files[0].filePath], ctx diff --git a/src/dataflow/instrument/instrument-dataflow-count.ts b/src/dataflow/instrument/instrument-dataflow-count.ts new file mode 100644 index 00000000000..133061709da --- /dev/null +++ b/src/dataflow/instrument/instrument-dataflow-count.ts @@ -0,0 +1,25 @@ +import type { DataflowProcessors } from '../processor'; +import type { ParentInformation } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { FlowrAnalyzerContext } from '../../project/context/flowr-analyzer-context'; +import type { RType } from '../../r-bridge/lang-4.x/ast/model/type'; +import type { RNode } from '../../r-bridge/lang-4.x/ast/model/model'; +import type { DataflowInformation } from '../info'; + +/** + * This takes the out parameter `countMap` and fills it with the count of how many times each RType was processed. + * The accompanying `reset` function can be used to reset the map to an empty state. + */ +export function instrumentDataflowCount(countMap: Map, reset: (map: Map) => void): (extractor: DataflowProcessors, ctx: FlowrAnalyzerContext) => DataflowProcessors { + return (extractor, _ctx) => { + reset(countMap); + const instrumented: DataflowProcessors = {} as DataflowProcessors; + for(const [key, processor] of Object.entries(extractor) as [RType, (...args: unknown[]) => DataflowInformation][]) { + instrumented[key as RNode['type']] = ((...args: unknown[]) => { + const prev = countMap.get(key) ?? 0; + countMap.set(key, prev + 1); + return processor(...args); + }) as never; + } + return instrumented; + }; +} \ No newline at end of file diff --git a/src/documentation/wiki-interface.ts b/src/documentation/wiki-interface.ts index 68679dcbbaf..304f4e25532 100644 --- a/src/documentation/wiki-interface.ts +++ b/src/documentation/wiki-interface.ts @@ -252,7 +252,8 @@ ${codeBlock('json', JSON.stringify( } }, repl: { - quickStats: false + quickStats: false, + dfProcessorHeat: false }, project: { resolveUnknownPathsOnDisk: true @@ -268,7 +269,8 @@ ${codeBlock('json', JSON.stringify( inferWorkingDirectory: InferWorkingDirectory.ActiveScript, searchPath: [] }, - slicer: { + instrument: {}, + slicer: { threshold: 50 } }, diff --git a/src/documentation/wiki-query.ts b/src/documentation/wiki-query.ts index 1bb042ab597..71c2ab5182b 100644 --- a/src/documentation/wiki-query.ts +++ b/src/documentation/wiki-query.ts @@ -533,7 +533,7 @@ ${ } One of the most useful options to change on-the-fly are probably those under \`repl\`. For example, setting \`repl.quickStats=true\` -enables quick statistics after each REPL command. +enables quick statistics after each REPL command. Likewise, setting \`repl.dfProcessorHeat=true\` enables the dataflow processor heatmap after each REPL command. `; } }); diff --git a/src/queries/catalog/config-query/config-query-format.ts b/src/queries/catalog/config-query/config-query-format.ts index c2c61800970..60e474ac3a6 100644 --- a/src/queries/catalog/config-query/config-query-format.ts +++ b/src/queries/catalog/config-query/config-query-format.ts @@ -6,7 +6,7 @@ import Joi from 'joi'; import type { FlowrConfigOptions } from '../../../config'; import { jsonReplacer } from '../../../util/json'; import type { DeepPartial } from 'ts-essentials'; -import type { ParsedQueryLine, SupportedQuery } from '../../query'; +import type { ParsedQueryLine, Query, SupportedQuery } from '../../query'; import type { ReplOutput } from '../../../cli/repl/commands/repl-main'; import type { CommandCompletions } from '../../../cli/repl/core'; @@ -83,12 +83,49 @@ function configQueryLineParser(output: ReplOutput, line: readonly string[], _con }; } +function collectKeysFromUpdate(update: DeepPartial, prefix: string = ''): string[] { + // only collect leaf keys + const keys: string[] = []; + for(const [key, value] of Object.entries(update)) { + const fullKey = prefix ? `${prefix}.${key}` : key; + if(value && typeof value === 'object' && !Array.isArray(value)) { + keys.push(...collectKeysFromUpdate(value as DeepPartial, fullKey)); + } else { + keys.push(fullKey); + } + } + return keys; +} + +function getValueAtPath(obj: object, path: string[]): unknown { + let current: unknown = obj; + for(const key of path) { + if(current && typeof current === 'object' && (current as Record)[key] !== undefined) { + current = (current as Record)[key]; + } else { + return undefined; + } + } + return current; +} + export const ConfigQueryDefinition = { executor: executeConfigQuery, - asciiSummarizer: (formatter: OutputFormatter, _analyzer: unknown, queryResults: BaseQueryResult, result: string[]) => { + asciiSummarizer: (formatter: OutputFormatter, _analyzer: unknown, queryResults: BaseQueryResult, result: string[], queries: readonly Query[]) => { const out = queryResults as ConfigQueryResult; result.push(`Query: ${bold('config', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); - result.push(` ╰ Config:\n${JSON.stringify(out.config, jsonReplacer, 4)}`); + const configQueries = queries.filter(q => q.type === 'config'); + if(configQueries.some(q => q.update)) { + const updatedKeys = configQueries.flatMap(q => q.update ? collectKeysFromUpdate(q.update) : []); + result.push(' ╰ Updated configuration:'); + for(const key of updatedKeys) { + const path = key.split('.'); + const newValue = getValueAtPath(out.config, path); + result.push(` - ${key}: ${JSON.stringify(newValue, jsonReplacer)}`); + } + } else { + result.push(` ╰ Config:\n${JSON.stringify(out.config, jsonReplacer, 4)}`); + } return true; }, completer: configReplCompleter,