From 6e35d91f79fc9ad4e6bfa736a09f0008b205652c Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Thu, 16 Oct 2025 13:47:02 -0500 Subject: [PATCH 1/7] Add dispatch blocking information to JSON output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tracks why ready uops don't dispatch each cycle, emitting blockedFromDispatch events when blocking reasons change. This helps visualize and understand pipeline bottlenecks beyond basic port contention. Blocking reasons tracked: - port_busy_older_uop: Older uop dispatched to the port instead - port_blocked_resource: Port blocked by microarchitectural constraint - port_removed_by_constraint: Port excluded by paired stores or 256-bit restrictions Uses BlockingEvent NamedTuple following codebase patterns. Only emits events when the blocking reason changes to minimize JSON size. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/uiCA.py b/uiCA.py index 45fd22e..96a0314 100755 --- a/uiCA.py +++ b/uiCA.py @@ -8,7 +8,7 @@ from concurrent import futures from heapq import heappop, heappush from itertools import count, repeat -from typing import List, Dict, NamedTuple, Optional +from typing import List, Dict, NamedTuple, Optional, Tuple, Any import random random.seed(0) @@ -832,6 +832,7 @@ def __init__(self, uArchConfig: MicroArchConfig): self.blockedResources = dict() # for how many remaining cycle a resource will be blocked self.blockedResources['div'] = 0 self.dependentUops = dict() # uops that have an operand that is written by a non-executed uop + self.blockingInfo: List[BlockingEvent] = [] # tracks why ready uops don't dispatch each cycle def isFull(self): return len(self.uops) + self.uArchConfig.issueWidth > self.uArchConfig.RSWidth @@ -880,6 +881,9 @@ def dispatchUops(self, clock): and ((not self.readyQueue['0']) or self.readyDivUops[0][0] < self.readyQueue['0'][0][0])): queue = self.readyDivUops if self.blockedResources.get('port' + port): + # Record all uops in this port's queue as blocked by resource + for _, uop in queue: + self.blockingInfo.append(BlockingEvent(clock, uop, 'port_blocked_resource', {'port': port})) continue if not queue: continue @@ -891,10 +895,24 @@ def dispatchUops(self, clock): uopsDispatched.append(uop) self.pendingUops.add(uop) + # Record that remaining uops in queue were passed over by an older uop + for _, remaining_uop in queue: + self.blockingInfo.append(BlockingEvent(clock, remaining_uop, 'port_busy_older_uop', { + 'port': port, + 'dispatchedInstead': uop + })) + self.blockedResources['div'] += uop.prop.divCycles if self.uArchConfig.slow256BitMemAcc and (port == '4') and ('M256' in uop.instrI.instr.instrStr): self.blockedResources['port' + port] = 2 + # Check for uops on ports that were removed from applicablePorts + allPortsList = list(allPorts[self.uArchConfig.name]) + for port in allPortsList: + if port not in applicablePorts: + for _, uop in self.readyQueue[port]: + self.blockingInfo.append(BlockingEvent(clock, uop, 'port_removed_by_constraint', {'port': port})) + for uop in self.uopsDispatchedInPrevCycle: self.portUsage[uop.actualPort] -= 1 self.uopsDispatchedInPrevCycle = uopsDispatched @@ -1441,6 +1459,7 @@ def canBeInDSB(block, DSBBlockSize): return True +BlockingEvent = NamedTuple('BlockingEvent', [('clock', int), ('uop', 'Uop'), ('reason', str), ('details', Dict[str, Any])]) TableLineData = NamedTuple('TableLineData', [('string', str), ('instr', Optional[Instr]), ('url', Optional[str]), ('uopsForRnd', List[List[LaminatedUop]])]) def getUopsTableColumns(tableLineData: List[TableLineData], uArchConfig: MicroArchConfig): @@ -1795,7 +1814,7 @@ def addEvent(evtName, cycle, val=1): writeHtmlFile(filename, 'Graph', head, body, includeDOCTYPE=False) # if DOCTYPE is included, scaling doesn't work properly -def generateJSONOutput(filename, instructions: List[Instr], frontEnd: FrontEnd, uArchConfig: MicroArchConfig, maxCycle): +def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: FrontEnd, uArchConfig: MicroArchConfig, maxCycle: int, scheduler: 'Scheduler'): parameters = { 'uArchName': uArchConfig.name, 'IQWidth': uArchConfig.IQWidth, @@ -1886,6 +1905,40 @@ def generateJSONOutput(filename, instructions: List[Instr], frontEnd: FrontEnd, if (uop.executed is not None) and (uop.executed <= maxCycle): cycles[uop.executed].setdefault('executed', []).append(unfusedUopDict) + # Process blocking information from scheduler + # Group by uop, then deduplicate consecutive same-reason events + uopBlockingEvents: Dict[Uop, List[Tuple[int, str, Dict[str, Any]]]] = {} + for event in scheduler.blockingInfo: + if event.clock > maxCycle: + continue + if event.uop not in uopBlockingEvents: + uopBlockingEvents[event.uop] = [] + uopBlockingEvents[event.uop].append((event.clock, event.reason, event.details)) + + # Add to cycles, but only when reason changes from previous cycle + for uop, events in uopBlockingEvents.items(): + if uop not in unfusedUopToDict: + continue # Uop might not be in the tracked range + + lastReason = None + for clock, reason, details in events: + if reason != lastReason: + # Create blocking event dict based on uop's identity + blockingDict = unfusedUopToDict[uop].copy() + blockingDict['reason'] = reason + + # Add details (port, etc.) + for key, value in details.items(): + if key == 'dispatchedInstead': + # Convert uop reference to dict + if value in unfusedUopToDict: + blockingDict['dispatchedInstead'] = unfusedUopToDict[value] + else: + blockingDict[key] = value + + cycles[clock].setdefault('blockedFromDispatch', []).append(blockingDict) + lastReason = reason + import json jsonStr = json.dumps({'parameters': parameters, 'instructions': instrList, 'cycles': cycles}, sort_keys=True) @@ -2003,7 +2056,7 @@ def runSimulation(disas, uArchConfig: MicroArchConfig, alignmentOffset, initPoli generateGraphvizOutputForLatencyGraph(instructions, nodesForInstr, edgesForNode, edgesOnMaxCycle, comp, depGraphFile) if jsonFile is not None: - generateJSONOutput(jsonFile, instructions, frontEnd, uArchConfig, clock-1) + generateJSONOutput(jsonFile, instructions, frontEnd, uArchConfig, clock-1, scheduler) return TP From 8852559ef68f9e41d950927a930a2962a4f60034 Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Thu, 16 Oct 2025 14:03:44 -0500 Subject: [PATCH 2/7] Add issue blocking tracking to JSON output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the blocking information system to track why uops don't issue from the IDQ (instruction decode queue) each cycle. This complements the existing dispatch blocking tracking. Issue blocking reasons tracked: - issue_width_exceeded: Exceeded the CPU's issue width limit (4 on SKL) - register_merge_required: Waiting for register merge uops to be issued - serializing_instruction_waiting: Serializing instruction waiting for ROB to drain - reorder_buffer_full: ROB is full, preventing issue - reservation_station_full: RS is full, preventing issue The blocking events are emitted to the JSON output under the 'blockedFromIssue' array for each cycle. Like dispatch blocking, events are deduplicated to only emit when the blocking reason changes. This information enables visualizers (like CE's planned wavefront view) to explain why instructions are queued and unable to progress through the pipeline, without having to reverse-engineer the blocking reasons from dependencies and resource usage. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/uiCA.py b/uiCA.py index 96a0314..69560fd 100755 --- a/uiCA.py +++ b/uiCA.py @@ -168,6 +168,7 @@ def __init__(self, IDQ, reorderBuffer, uArchConfig: MicroArchConfig, initPolicy) self.storeBufferEntryDict = {} self.lastRegMergeIssued = None # last uop for which register merge uops were issued + self.blockingInfo: List[BlockingEvent] = [] # tracks why uops in IDQ don't issue each cycle def cycle(self): self.renamerActiveCycle += 1 @@ -187,12 +188,27 @@ def cycle(self): renamerUops.append(mergeUop) firstUnfusedUop.instrI.regMergeUops.append(LaminatedUop([mergeUop])) self.lastRegMergeIssued = firstUnfusedUop + # Record that remaining IDQ uops blocked by register merge requirement + for lamUop in self.IDQ: + for fUop in lamUop.getFusedUops(): + for uop in fUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required', {})) break if firstUnfusedUop.prop.isFirstUopOfInstr and firstUnfusedUop.prop.instr.isSerializingInstr and not self.reorderBuffer.isEmpty(): + # Record that this serializing instruction is blocked + for uop in lamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting', {})) break fusedUops = lamUop.getFusedUops() if len(renamerUops) + len(fusedUops) > self.uArchConfig.issueWidth: + # Record that this lamUop and all remaining IDQ uops blocked by issue width + for uop in lamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) + for remainingLamUop in list(self.IDQ)[1:]: # Skip first (already handled) + for fUop in remainingLamUop.getFusedUops(): + for uop in fUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) break renamerUops.extend(fusedUops) self.IDQ.popleft() @@ -408,6 +424,13 @@ def cycle(self, clock): issueUops = [] if not self.reorderBuffer.isFull() and not self.scheduler.isFull(): # len(self.IDQ) >= uArchConfig.issueWidth and the first check seems to be wrong, but leads to better results issueUops = self.renamer.cycle() + else: + # Record all uops in IDQ as blocked by RB or RS full + reason = 'reorder_buffer_full' if self.reorderBuffer.isFull() else 'reservation_station_full' + for lamUop in self.IDQ: + for fUop in lamUop.getFusedUops(): + for uop in fUop.getUnfusedUops(): + self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason, {})) for fusedUop in issueUops: fusedUop.issued = clock @@ -1814,7 +1837,7 @@ def addEvent(evtName, cycle, val=1): writeHtmlFile(filename, 'Graph', head, body, includeDOCTYPE=False) # if DOCTYPE is included, scaling doesn't work properly -def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: FrontEnd, uArchConfig: MicroArchConfig, maxCycle: int, scheduler: 'Scheduler'): +def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: FrontEnd, uArchConfig: MicroArchConfig, maxCycle: int, scheduler: 'Scheduler', renamer: 'Renamer'): parameters = { 'uArchName': uArchConfig.name, 'IQWidth': uArchConfig.IQWidth, @@ -1939,6 +1962,34 @@ def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: Front cycles[clock].setdefault('blockedFromDispatch', []).append(blockingDict) lastReason = reason + # Process issue blocking information from renamer + uopIssueBlockingEvents: Dict[Uop, List[Tuple[int, str, Dict[str, Any]]]] = {} + for event in renamer.blockingInfo: + if event.clock > maxCycle: + continue + if event.uop not in uopIssueBlockingEvents: + uopIssueBlockingEvents[event.uop] = [] + uopIssueBlockingEvents[event.uop].append((event.clock, event.reason, event.details)) + + # Add to cycles, but only when reason changes from previous cycle + for uop, events in uopIssueBlockingEvents.items(): + if uop not in unfusedUopToDict: + continue # Uop might not be in the tracked range + + lastReason = None + for clock, reason, details in events: + if reason != lastReason: + # Create blocking event dict based on uop's identity + blockingDict = unfusedUopToDict[uop].copy() + blockingDict['reason'] = reason + + # Add details if any + for key, value in details.items(): + blockingDict[key] = value + + cycles[clock].setdefault('blockedFromIssue', []).append(blockingDict) + lastReason = reason + import json jsonStr = json.dumps({'parameters': parameters, 'instructions': instrList, 'cycles': cycles}, sort_keys=True) @@ -2056,7 +2107,7 @@ def runSimulation(disas, uArchConfig: MicroArchConfig, alignmentOffset, initPoli generateGraphvizOutputForLatencyGraph(instructions, nodesForInstr, edgesForNode, edgesOnMaxCycle, comp, depGraphFile) if jsonFile is not None: - generateJSONOutput(jsonFile, instructions, frontEnd, uArchConfig, clock-1, scheduler) + generateJSONOutput(jsonFile, instructions, frontEnd, uArchConfig, clock-1, scheduler, frontEnd.renamer) return TP From 5aa1cbe39def558f571b3d707baa5851d99d3e77 Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Fri, 17 Oct 2025 08:36:29 -0500 Subject: [PATCH 3/7] Add front-end blocking tracking to JSON output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the blocking information system to track why instructions can't be decoded from the instruction queue (IQ) into the IDQ (instruction decode queue) each cycle. This completes pipeline blocking coverage alongside the existing issue and dispatch blocking tracking. Front-end blocking reasons tracked: - idq_full: IDQ is full, preventing further decode. This occurs when the backend (issue/execute) can't keep up with the front-end decode rate, causing the IDQ to fill up. The blocking events are emitted to the JSON output under the 'blockedFromDecode' array for each cycle. Instructions waiting in the IQ are reported along with the IDQ size at the time of blocking. Test case: A long dependency chain (60 inc rax instructions) triggers IDQ full conditions in 43% of cycles, demonstrating real front-end bottlenecks where decode stalls because the backend is slow. This information enables visualizers (like CE's planned wavefront view) to explain why instructions in the predecode (P) state aren't progressing to the queue (Q) state, completing the pipeline blocking picture: P→Q→I→r→D→E→R. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/uiCA.py b/uiCA.py index 69560fd..6a3217b 100755 --- a/uiCA.py +++ b/uiCA.py @@ -373,6 +373,7 @@ def __init__(self, instructions: List[Instr], reorderBuffer, scheduler, uArchCon self.unroll = unroll self.alignmentOffset = alignmentOffset self.perfEvents = perfEvents + self.blockingInfo: List[InstructionBlockingEvent] = [] # tracks why instructions can't be decoded each cycle self.MS = MicrocodeSequencer(self.uArchConfig) @@ -447,6 +448,9 @@ def cycle(self, clock): if len(self.IDQ) + self.uArchConfig.DSBWidth > self.uArchConfig.IDQWidth: self.perfEvents.setdefault(clock, {})['IDQFull'] = 1 + # Record all instructions in instruction queue as blocked from decode + for instrI in self.instructionQueue: + self.blockingInfo.append(InstructionBlockingEvent(clock, instrI, 'idq_full', {'idqSize': len(self.IDQ)})) return if self.uopSource is None: @@ -1483,6 +1487,7 @@ def canBeInDSB(block, DSBBlockSize): BlockingEvent = NamedTuple('BlockingEvent', [('clock', int), ('uop', 'Uop'), ('reason', str), ('details', Dict[str, Any])]) +InstructionBlockingEvent = NamedTuple('InstructionBlockingEvent', [('clock', int), ('instrInstance', 'InstrInstance'), ('reason', str), ('details', Dict[str, Any])]) TableLineData = NamedTuple('TableLineData', [('string', str), ('instr', Optional[Instr]), ('url', Optional[str]), ('uopsForRnd', List[List[LaminatedUop]])]) def getUopsTableColumns(tableLineData: List[TableLineData], uArchConfig: MicroArchConfig): @@ -1990,6 +1995,30 @@ def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: Front cycles[clock].setdefault('blockedFromIssue', []).append(blockingDict) lastReason = reason + # Process front-end blocking information + # Group by instruction instance, then deduplicate consecutive same-reason events + instrBlockingEvents: Dict['InstrInstance', List[Tuple[int, str, Dict[str, Any]]]] = {} + for event in frontEnd.blockingInfo: + if event.clock > maxCycle: + continue + if event.instrInstance not in instrBlockingEvents: + instrBlockingEvents[event.instrInstance] = [] + instrBlockingEvents[event.instrInstance].append((event.clock, event.reason, event.details)) + + # Add to cycles, but only when reason changes from previous cycle + for instrI, events in instrBlockingEvents.items(): + instrID = instrToID[instrI.instr] + rnd = instrI.rnd + lastReason = None + for clock, reason, details in events: + if reason != lastReason: + blockingDict = {'instrID': instrID, 'rnd': rnd} + blockingDict['reason'] = reason + for key, value in details.items(): + blockingDict[key] = value + cycles[clock].setdefault('blockedFromDecode', []).append(blockingDict) + lastReason = reason + import json jsonStr = json.dumps({'parameters': parameters, 'instructions': instrList, 'cycles': cycles}, sort_keys=True) From 95ff4117703df1f840da77358487eec9783280e7 Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Fri, 17 Oct 2025 09:48:00 -0500 Subject: [PATCH 4/7] Refactor blocking event code for style consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve code consistency with original codebase conventions: - Move imports to module level - Use namedtuple with defaults instead of NamedTuple - Remove type annotations from instance variables - Simplify nested loops using existing getUnfusedUops() - Extract duplicate event processing into helper function Reduces code by 32 lines while maintaining all functionality. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 156 ++++++++++++++++++++++---------------------------------- 1 file changed, 62 insertions(+), 94 deletions(-) diff --git a/uiCA.py b/uiCA.py index 6a3217b..3c2c04f 100755 --- a/uiCA.py +++ b/uiCA.py @@ -2,6 +2,7 @@ import argparse import importlib +import json import os import re from collections import Counter, deque, namedtuple, OrderedDict @@ -168,7 +169,7 @@ def __init__(self, IDQ, reorderBuffer, uArchConfig: MicroArchConfig, initPolicy) self.storeBufferEntryDict = {} self.lastRegMergeIssued = None # last uop for which register merge uops were issued - self.blockingInfo: List[BlockingEvent] = [] # tracks why uops in IDQ don't issue each cycle + self.blockingInfo = [] # tracks why uops in IDQ don't issue each cycle def cycle(self): self.renamerActiveCycle += 1 @@ -190,25 +191,23 @@ def cycle(self): self.lastRegMergeIssued = firstUnfusedUop # Record that remaining IDQ uops blocked by register merge requirement for lamUop in self.IDQ: - for fUop in lamUop.getFusedUops(): - for uop in fUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required', {})) + for uop in lamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required')) break if firstUnfusedUop.prop.isFirstUopOfInstr and firstUnfusedUop.prop.instr.isSerializingInstr and not self.reorderBuffer.isEmpty(): # Record that this serializing instruction is blocked for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting', {})) + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting')) break fusedUops = lamUop.getFusedUops() if len(renamerUops) + len(fusedUops) > self.uArchConfig.issueWidth: # Record that this lamUop and all remaining IDQ uops blocked by issue width for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded')) for remainingLamUop in list(self.IDQ)[1:]: # Skip first (already handled) - for fUop in remainingLamUop.getFusedUops(): - for uop in fUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) + for uop in remainingLamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded')) break renamerUops.extend(fusedUops) self.IDQ.popleft() @@ -373,7 +372,7 @@ def __init__(self, instructions: List[Instr], reorderBuffer, scheduler, uArchCon self.unroll = unroll self.alignmentOffset = alignmentOffset self.perfEvents = perfEvents - self.blockingInfo: List[InstructionBlockingEvent] = [] # tracks why instructions can't be decoded each cycle + self.blockingInfo = [] # tracks why instructions can't be decoded each cycle self.MS = MicrocodeSequencer(self.uArchConfig) @@ -429,9 +428,8 @@ def cycle(self, clock): # Record all uops in IDQ as blocked by RB or RS full reason = 'reorder_buffer_full' if self.reorderBuffer.isFull() else 'reservation_station_full' for lamUop in self.IDQ: - for fUop in lamUop.getFusedUops(): - for uop in fUop.getUnfusedUops(): - self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason, {})) + for uop in lamUop.getUnfusedUops(): + self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason)) for fusedUop in issueUops: fusedUop.issued = clock @@ -859,7 +857,7 @@ def __init__(self, uArchConfig: MicroArchConfig): self.blockedResources = dict() # for how many remaining cycle a resource will be blocked self.blockedResources['div'] = 0 self.dependentUops = dict() # uops that have an operand that is written by a non-executed uop - self.blockingInfo: List[BlockingEvent] = [] # tracks why ready uops don't dispatch each cycle + self.blockingInfo = [] # tracks why ready uops don't dispatch each cycle def isFull(self): return len(self.uops) + self.uArchConfig.issueWidth > self.uArchConfig.RSWidth @@ -1370,6 +1368,9 @@ def __generateUops(self): return laminatedDomainUops +BlockingEvent = namedtuple('BlockingEvent', ['clock', 'uop', 'reason', 'details'], defaults=[{}]) +InstructionBlockingEvent = namedtuple('InstructionBlockingEvent', ['clock', 'instrInstance', 'reason', 'details'], defaults=[{}]) + def split64ByteBlockTo16ByteBlocks(cacheBlock): return [[ii for ii in cacheBlock if b*16 <= ii.address % 64 < (b+1)*16 ] for b in range(0,4)] @@ -1486,8 +1487,6 @@ def canBeInDSB(block, DSBBlockSize): return True -BlockingEvent = NamedTuple('BlockingEvent', [('clock', int), ('uop', 'Uop'), ('reason', str), ('details', Dict[str, Any])]) -InstructionBlockingEvent = NamedTuple('InstructionBlockingEvent', [('clock', int), ('instrInstance', 'InstrInstance'), ('reason', str), ('details', Dict[str, Any])]) TableLineData = NamedTuple('TableLineData', [('string', str), ('instr', Optional[Instr]), ('url', Optional[str]), ('uopsForRnd', List[List[LaminatedUop]])]) def getUopsTableColumns(tableLineData: List[TableLineData], uArchConfig: MicroArchConfig): @@ -1842,7 +1841,28 @@ def addEvent(evtName, cycle, val=1): writeHtmlFile(filename, 'Graph', head, body, includeDOCTYPE=False) # if DOCTYPE is included, scaling doesn't work properly -def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: FrontEnd, uArchConfig: MicroArchConfig, maxCycle: int, scheduler: 'Scheduler', renamer: 'Renamer'): +def _groupAndDeduplicateBlockingEvents(events, maxCycle, getKey): + grouped = {} + for event in events: + if event.clock > maxCycle: + continue + key = getKey(event) + if key not in grouped: + grouped[key] = [] + grouped[key].append((event.clock, event.reason, event.details)) + + deduplicated = {} + for key, eventList in grouped.items(): + deduplicated[key] = [] + lastReason = None + for clock, reason, details in eventList: + if reason != lastReason: + deduplicated[key].append((clock, reason, details)) + lastReason = reason + + return deduplicated + +def generateJSONOutput(filename, instructions, frontEnd, uArchConfig, maxCycle, scheduler, renamer): parameters = { 'uArchName': uArchConfig.name, 'IQWidth': uArchConfig.IQWidth, @@ -1933,93 +1953,41 @@ def generateJSONOutput(filename: str, instructions: List[Instr], frontEnd: Front if (uop.executed is not None) and (uop.executed <= maxCycle): cycles[uop.executed].setdefault('executed', []).append(unfusedUopDict) - # Process blocking information from scheduler - # Group by uop, then deduplicate consecutive same-reason events - uopBlockingEvents: Dict[Uop, List[Tuple[int, str, Dict[str, Any]]]] = {} - for event in scheduler.blockingInfo: - if event.clock > maxCycle: - continue - if event.uop not in uopBlockingEvents: - uopBlockingEvents[event.uop] = [] - uopBlockingEvents[event.uop].append((event.clock, event.reason, event.details)) - - # Add to cycles, but only when reason changes from previous cycle - for uop, events in uopBlockingEvents.items(): + # Process scheduler blocking events + for uop, events in _groupAndDeduplicateBlockingEvents(scheduler.blockingInfo, maxCycle, lambda e: e.uop).items(): if uop not in unfusedUopToDict: - continue # Uop might not be in the tracked range - - lastReason = None - for clock, reason, details in events: - if reason != lastReason: - # Create blocking event dict based on uop's identity - blockingDict = unfusedUopToDict[uop].copy() - blockingDict['reason'] = reason - - # Add details (port, etc.) - for key, value in details.items(): - if key == 'dispatchedInstead': - # Convert uop reference to dict - if value in unfusedUopToDict: - blockingDict['dispatchedInstead'] = unfusedUopToDict[value] - else: - blockingDict[key] = value - - cycles[clock].setdefault('blockedFromDispatch', []).append(blockingDict) - lastReason = reason - - # Process issue blocking information from renamer - uopIssueBlockingEvents: Dict[Uop, List[Tuple[int, str, Dict[str, Any]]]] = {} - for event in renamer.blockingInfo: - if event.clock > maxCycle: continue - if event.uop not in uopIssueBlockingEvents: - uopIssueBlockingEvents[event.uop] = [] - uopIssueBlockingEvents[event.uop].append((event.clock, event.reason, event.details)) - - # Add to cycles, but only when reason changes from previous cycle - for uop, events in uopIssueBlockingEvents.items(): - if uop not in unfusedUopToDict: - continue # Uop might not be in the tracked range - - lastReason = None for clock, reason, details in events: - if reason != lastReason: - # Create blocking event dict based on uop's identity - blockingDict = unfusedUopToDict[uop].copy() - blockingDict['reason'] = reason - - # Add details if any - for key, value in details.items(): + blockingDict = unfusedUopToDict[uop].copy() + blockingDict['reason'] = reason + for key, value in details.items(): + if key == 'dispatchedInstead' and value in unfusedUopToDict: + blockingDict['dispatchedInstead'] = unfusedUopToDict[value] + else: blockingDict[key] = value + cycles[clock].setdefault('blockedFromDispatch', []).append(blockingDict) - cycles[clock].setdefault('blockedFromIssue', []).append(blockingDict) - lastReason = reason - - # Process front-end blocking information - # Group by instruction instance, then deduplicate consecutive same-reason events - instrBlockingEvents: Dict['InstrInstance', List[Tuple[int, str, Dict[str, Any]]]] = {} - for event in frontEnd.blockingInfo: - if event.clock > maxCycle: + # Process renamer blocking events + for uop, events in _groupAndDeduplicateBlockingEvents(renamer.blockingInfo, maxCycle, lambda e: e.uop).items(): + if uop not in unfusedUopToDict: continue - if event.instrInstance not in instrBlockingEvents: - instrBlockingEvents[event.instrInstance] = [] - instrBlockingEvents[event.instrInstance].append((event.clock, event.reason, event.details)) - - # Add to cycles, but only when reason changes from previous cycle - for instrI, events in instrBlockingEvents.items(): + for clock, reason, details in events: + blockingDict = unfusedUopToDict[uop].copy() + blockingDict['reason'] = reason + for key, value in details.items(): + blockingDict[key] = value + cycles[clock].setdefault('blockedFromIssue', []).append(blockingDict) + + # Process front-end blocking events + for instrI, events in _groupAndDeduplicateBlockingEvents(frontEnd.blockingInfo, maxCycle, lambda e: e.instrInstance).items(): instrID = instrToID[instrI.instr] rnd = instrI.rnd - lastReason = None for clock, reason, details in events: - if reason != lastReason: - blockingDict = {'instrID': instrID, 'rnd': rnd} - blockingDict['reason'] = reason - for key, value in details.items(): - blockingDict[key] = value - cycles[clock].setdefault('blockedFromDecode', []).append(blockingDict) - lastReason = reason + blockingDict = {'instrID': instrID, 'rnd': rnd, 'reason': reason} + for key, value in details.items(): + blockingDict[key] = value + cycles[clock].setdefault('blockedFromDecode', []).append(blockingDict) - import json jsonStr = json.dumps({'parameters': parameters, 'instructions': instrList, 'cycles': cycles}, sort_keys=True) with open(filename, 'w') as f: From c179cef5717e024a6e499391072920e763105f9c Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Fri, 17 Oct 2025 12:32:13 -0500 Subject: [PATCH 5/7] Remove unnecessary style changes for upstream PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move json import to local scope in generateJSONOutput (matches existing style) - Remove unused Tuple and Any type imports - Remove namedtuple defaults parameter (requires Python 3.7+) - Add explicit empty dict arguments to maintain compatibility with Python 3.6 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/uiCA.py b/uiCA.py index 3c2c04f..baf4094 100755 --- a/uiCA.py +++ b/uiCA.py @@ -2,14 +2,13 @@ import argparse import importlib -import json import os import re from collections import Counter, deque, namedtuple, OrderedDict from concurrent import futures from heapq import heappop, heappush from itertools import count, repeat -from typing import List, Dict, NamedTuple, Optional, Tuple, Any +from typing import List, Dict, NamedTuple, Optional import random random.seed(0) @@ -192,22 +191,22 @@ def cycle(self): # Record that remaining IDQ uops blocked by register merge requirement for lamUop in self.IDQ: for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required')) + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required', {})) break if firstUnfusedUop.prop.isFirstUopOfInstr and firstUnfusedUop.prop.instr.isSerializingInstr and not self.reorderBuffer.isEmpty(): # Record that this serializing instruction is blocked for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting')) + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting', {})) break fusedUops = lamUop.getFusedUops() if len(renamerUops) + len(fusedUops) > self.uArchConfig.issueWidth: # Record that this lamUop and all remaining IDQ uops blocked by issue width for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded')) + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) for remainingLamUop in list(self.IDQ)[1:]: # Skip first (already handled) for uop in remainingLamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded')) + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) break renamerUops.extend(fusedUops) self.IDQ.popleft() @@ -429,7 +428,7 @@ def cycle(self, clock): reason = 'reorder_buffer_full' if self.reorderBuffer.isFull() else 'reservation_station_full' for lamUop in self.IDQ: for uop in lamUop.getUnfusedUops(): - self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason)) + self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason, {})) for fusedUop in issueUops: fusedUop.issued = clock @@ -1368,8 +1367,8 @@ def __generateUops(self): return laminatedDomainUops -BlockingEvent = namedtuple('BlockingEvent', ['clock', 'uop', 'reason', 'details'], defaults=[{}]) -InstructionBlockingEvent = namedtuple('InstructionBlockingEvent', ['clock', 'instrInstance', 'reason', 'details'], defaults=[{}]) +BlockingEvent = namedtuple('BlockingEvent', ['clock', 'uop', 'reason', 'details']) +InstructionBlockingEvent = namedtuple('InstructionBlockingEvent', ['clock', 'instrInstance', 'reason', 'details']) def split64ByteBlockTo16ByteBlocks(cacheBlock): return [[ii for ii in cacheBlock if b*16 <= ii.address % 64 < (b+1)*16 ] for b in range(0,4)] @@ -1863,6 +1862,7 @@ def _groupAndDeduplicateBlockingEvents(events, maxCycle, getKey): return deduplicated def generateJSONOutput(filename, instructions, frontEnd, uArchConfig, maxCycle, scheduler, renamer): + import json parameters = { 'uArchName': uArchConfig.name, 'IQWidth': uArchConfig.IQWidth, From 922142e4a34f2655bde1913b346db227726e73dc Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Fri, 17 Oct 2025 16:25:33 -0500 Subject: [PATCH 6/7] Fix blocking event deduplication to preserve details MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deduplication logic was only comparing the 'reason' string, which caused loss of information when details differed between consecutive events. This particularly affected port_busy_older_uop events where different uops dispatch on consecutive cycles. Now compares both reason and details, preserving all events where either changes. This ensures dispatchedInstead information is not lost. Side effect: idq_full events will now record IDQ size fluctuations instead of collapsing to first occurrence. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/uiCA.py b/uiCA.py index baf4094..4eb3cf8 100755 --- a/uiCA.py +++ b/uiCA.py @@ -1854,10 +1854,12 @@ def _groupAndDeduplicateBlockingEvents(events, maxCycle, getKey): for key, eventList in grouped.items(): deduplicated[key] = [] lastReason = None + lastDetails = None for clock, reason, details in eventList: - if reason != lastReason: + if reason != lastReason or details != lastDetails: deduplicated[key].append((clock, reason, details)) lastReason = reason + lastDetails = details return deduplicated From af68562391e660a85066a180c839c711d5ba693a Mon Sep 17 00:00:00 2001 From: Matt Godbolt Date: Fri, 17 Oct 2025 16:48:08 -0500 Subject: [PATCH 7/7] Add --trackBlocking flag to gate blocking info collection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking info collection has O(N) overhead in the scheduler dispatch loop. This change makes it optional via command-line flag. Changes: - Add --trackBlocking argument to command-line parser - Thread trackBlocking flag through FrontEnd, Renamer, Scheduler - Guard all blockingInfo.append() calls with if self.trackBlocking - Add 'blockingTracked' to JSON parameters for consumers When disabled (default), no blocking info overhead during simulation. When enabled, full blocking diagnostics are collected. JSON consumers can check parameters.blockingTracked to distinguish "no blocking events" from "tracking was disabled". 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uiCA.py | 89 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 38 deletions(-) diff --git a/uiCA.py b/uiCA.py index 4eb3cf8..10b510f 100755 --- a/uiCA.py +++ b/uiCA.py @@ -142,11 +142,12 @@ def getReadyCycle(self): RenameDictEntry = namedtuple('RenameDictEntry', ['renamedOp', 'renamedByElim32BitMove']) class Renamer: - def __init__(self, IDQ, reorderBuffer, uArchConfig: MicroArchConfig, initPolicy): + def __init__(self, IDQ, reorderBuffer, uArchConfig: MicroArchConfig, initPolicy, trackBlocking=False): self.IDQ = IDQ self.reorderBuffer = reorderBuffer self.uArchConfig = uArchConfig self.absValGen = AbstractValueGenerator(initPolicy) + self.trackBlocking = trackBlocking self.renameDict = {} @@ -189,24 +190,27 @@ def cycle(self): firstUnfusedUop.instrI.regMergeUops.append(LaminatedUop([mergeUop])) self.lastRegMergeIssued = firstUnfusedUop # Record that remaining IDQ uops blocked by register merge requirement - for lamUop in self.IDQ: - for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required', {})) + if self.trackBlocking: + for lamUop in self.IDQ: + for uop in lamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'register_merge_required', {})) break if firstUnfusedUop.prop.isFirstUopOfInstr and firstUnfusedUop.prop.instr.isSerializingInstr and not self.reorderBuffer.isEmpty(): # Record that this serializing instruction is blocked - for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting', {})) + if self.trackBlocking: + for uop in lamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'serializing_instruction_waiting', {})) break fusedUops = lamUop.getFusedUops() if len(renamerUops) + len(fusedUops) > self.uArchConfig.issueWidth: # Record that this lamUop and all remaining IDQ uops blocked by issue width - for uop in lamUop.getUnfusedUops(): - self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) - for remainingLamUop in list(self.IDQ)[1:]: # Skip first (already handled) - for uop in remainingLamUop.getUnfusedUops(): + if self.trackBlocking: + for uop in lamUop.getUnfusedUops(): self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) + for remainingLamUop in list(self.IDQ)[1:]: # Skip first (already handled) + for uop in remainingLamUop.getUnfusedUops(): + self.blockingInfo.append(BlockingEvent(self.renamerActiveCycle, uop, 'issue_width_exceeded', {})) break renamerUops.extend(fusedUops) self.IDQ.popleft() @@ -362,15 +366,16 @@ def getStoreBufferKey(self, memAddr): class FrontEnd: def __init__(self, instructions: List[Instr], reorderBuffer, scheduler, uArchConfig: MicroArchConfig, - unroll, alignmentOffset, initPolicy, perfEvents, simpleFrontEnd=False): + unroll, alignmentOffset, initPolicy, perfEvents, simpleFrontEnd=False, trackBlocking=False): self.IDQ = deque() - self.renamer = Renamer(self.IDQ, reorderBuffer, uArchConfig, initPolicy) + self.renamer = Renamer(self.IDQ, reorderBuffer, uArchConfig, initPolicy, trackBlocking) self.reorderBuffer = reorderBuffer self.scheduler = scheduler self.uArchConfig = uArchConfig self.unroll = unroll self.alignmentOffset = alignmentOffset self.perfEvents = perfEvents + self.trackBlocking = trackBlocking self.blockingInfo = [] # tracks why instructions can't be decoded each cycle self.MS = MicrocodeSequencer(self.uArchConfig) @@ -425,10 +430,11 @@ def cycle(self, clock): issueUops = self.renamer.cycle() else: # Record all uops in IDQ as blocked by RB or RS full - reason = 'reorder_buffer_full' if self.reorderBuffer.isFull() else 'reservation_station_full' - for lamUop in self.IDQ: - for uop in lamUop.getUnfusedUops(): - self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason, {})) + if self.trackBlocking: + reason = 'reorder_buffer_full' if self.reorderBuffer.isFull() else 'reservation_station_full' + for lamUop in self.IDQ: + for uop in lamUop.getUnfusedUops(): + self.renamer.blockingInfo.append(BlockingEvent(clock, uop, reason, {})) for fusedUop in issueUops: fusedUop.issued = clock @@ -446,8 +452,9 @@ def cycle(self, clock): if len(self.IDQ) + self.uArchConfig.DSBWidth > self.uArchConfig.IDQWidth: self.perfEvents.setdefault(clock, {})['IDQFull'] = 1 # Record all instructions in instruction queue as blocked from decode - for instrI in self.instructionQueue: - self.blockingInfo.append(InstructionBlockingEvent(clock, instrI, 'idq_full', {'idqSize': len(self.IDQ)})) + if self.trackBlocking: + for instrI in self.instructionQueue: + self.blockingInfo.append(InstructionBlockingEvent(clock, instrI, 'idq_full', {'idqSize': len(self.IDQ)})) return if self.uopSource is None: @@ -835,8 +842,9 @@ def addUops(self, clock, newUops): class Scheduler: - def __init__(self, uArchConfig: MicroArchConfig): + def __init__(self, uArchConfig: MicroArchConfig, trackBlocking=False): self.uArchConfig = uArchConfig + self.trackBlocking = trackBlocking self.uops = set() self.portUsage = {p:0 for p in allPorts[self.uArchConfig.name]} self.portUsageAtStartOfCycle = {} @@ -906,8 +914,9 @@ def dispatchUops(self, clock): queue = self.readyDivUops if self.blockedResources.get('port' + port): # Record all uops in this port's queue as blocked by resource - for _, uop in queue: - self.blockingInfo.append(BlockingEvent(clock, uop, 'port_blocked_resource', {'port': port})) + if self.trackBlocking: + for _, uop in queue: + self.blockingInfo.append(BlockingEvent(clock, uop, 'port_blocked_resource', {'port': port})) continue if not queue: continue @@ -920,22 +929,24 @@ def dispatchUops(self, clock): self.pendingUops.add(uop) # Record that remaining uops in queue were passed over by an older uop - for _, remaining_uop in queue: - self.blockingInfo.append(BlockingEvent(clock, remaining_uop, 'port_busy_older_uop', { - 'port': port, - 'dispatchedInstead': uop - })) + if self.trackBlocking: + for _, remaining_uop in queue: + self.blockingInfo.append(BlockingEvent(clock, remaining_uop, 'port_busy_older_uop', { + 'port': port, + 'dispatchedInstead': uop + })) self.blockedResources['div'] += uop.prop.divCycles if self.uArchConfig.slow256BitMemAcc and (port == '4') and ('M256' in uop.instrI.instr.instrStr): self.blockedResources['port' + port] = 2 # Check for uops on ports that were removed from applicablePorts - allPortsList = list(allPorts[self.uArchConfig.name]) - for port in allPortsList: - if port not in applicablePorts: - for _, uop in self.readyQueue[port]: - self.blockingInfo.append(BlockingEvent(clock, uop, 'port_removed_by_constraint', {'port': port})) + if self.trackBlocking: + allPortsList = list(allPorts[self.uArchConfig.name]) + for port in allPortsList: + if port not in applicablePorts: + for _, uop in self.readyQueue[port]: + self.blockingInfo.append(BlockingEvent(clock, uop, 'port_removed_by_constraint', {'port': port})) for uop in self.uopsDispatchedInPrevCycle: self.portUsage[uop.actualPort] -= 1 @@ -1863,7 +1874,7 @@ def _groupAndDeduplicateBlockingEvents(events, maxCycle, getKey): return deduplicated -def generateJSONOutput(filename, instructions, frontEnd, uArchConfig, maxCycle, scheduler, renamer): +def generateJSONOutput(filename, instructions, frontEnd, uArchConfig, maxCycle, scheduler, renamer, trackBlocking): import json parameters = { 'uArchName': uArchConfig.name, @@ -1877,7 +1888,8 @@ def generateJSONOutput(filename, instructions, frontEnd, uArchConfig, maxCycle, 'DSBBlockSize': uArchConfig.DSBBlockSize, 'LSD': (frontEnd.uopSource == 'LSD'), 'LSDUnrollCount': frontEnd.LSDUnrollCount, - 'mode': 'unroll' if frontEnd.unroll else 'loop' + 'mode': 'unroll' if frontEnd.unroll else 'loop', + 'blockingTracked': trackBlocking } instrList = [] @@ -2005,7 +2017,7 @@ def getURL(instrStr): # Returns the throughput def runSimulation(disas, uArchConfig: MicroArchConfig, alignmentOffset, initPolicy, noMicroFusion, noMacroFusion, simpleFrontEnd, minIterations, minCycles, - printDetails=False, traceFile=None, graphFile=None, depGraphFile=None, jsonFile=None): + printDetails=False, traceFile=None, graphFile=None, depGraphFile=None, jsonFile=None, trackBlocking=False): instructions = getInstructions(disas, uArchConfig, importlib.import_module('instrData.'+uArchConfig.name+'_data'), alignmentOffset, noMicroFusion, noMacroFusion) if not instructions: @@ -2017,11 +2029,11 @@ def runSimulation(disas, uArchConfig: MicroArchConfig, alignmentOffset, initPoli retireQueue = deque() rb = ReorderBuffer(retireQueue, uArchConfig) - scheduler = Scheduler(uArchConfig) + scheduler = Scheduler(uArchConfig, trackBlocking) perfEvents: Dict[int, Dict[str, int]] = {} unroll = (not instructions[-1].isBranchInstr) - frontEnd = FrontEnd(instructions, rb, scheduler, uArchConfig, unroll, alignmentOffset, initPolicy, perfEvents, simpleFrontEnd) + frontEnd = FrontEnd(instructions, rb, scheduler, uArchConfig, unroll, alignmentOffset, initPolicy, perfEvents, simpleFrontEnd, trackBlocking) clock = 0 rnd = 0 @@ -2106,7 +2118,7 @@ def runSimulation(disas, uArchConfig: MicroArchConfig, alignmentOffset, initPoli generateGraphvizOutputForLatencyGraph(instructions, nodesForInstr, edgesForNode, edgesOnMaxCycle, comp, depGraphFile) if jsonFile is not None: - generateJSONOutput(jsonFile, instructions, frontEnd, uArchConfig, clock-1, scheduler, frontEnd.renamer) + generateJSONOutput(jsonFile, instructions, frontEnd, uArchConfig, clock-1, scheduler, frontEnd.renamer, trackBlocking) return TP @@ -2132,6 +2144,7 @@ def main(): parser.add_argument('-minCycles', help='Simulate at least this many cycles; default: 500', type=int, default=500) parser.add_argument('-json', help='JSON output', nargs='?', const='result.json') parser.add_argument('-depGraph', help='Output the dependency graph; the format is determined by the filename extension', nargs='?', const='dep.svg') + parser.add_argument('-trackBlocking', help='Track blocking events (may impact performance)', action='store_true') parser.add_argument('-initPolicy', help='Initial register state; ' 'options: "diff" (all registers initially have different values), ' '"same" (they all have the same value), ' @@ -2190,7 +2203,7 @@ def main(): print(' - {:.2f} otherwise\n'.format(sortedTP[-1][0], sortedTP[-1][1])) else: TP = runSimulation(disas, uArchConfig, int(args.alignmentOffset), args.initPolicy, args.noMicroFusion, args.noMacroFusion, args.simpleFrontEnd, - args.minIterations, args.minCycles, not args.TPonly, args.trace, args.graph, args.depGraph, args.json) + args.minIterations, args.minCycles, not args.TPonly, args.trace, args.graph, args.depGraph, args.json, args.trackBlocking) if args.TPonly: print('{:.2f}'.format(TP))