diff --git a/doc-DFT-howto.md b/doc-DFT-howto.md new file mode 100644 index 0000000000..5beda029b6 --- /dev/null +++ b/doc-DFT-howto.md @@ -0,0 +1,40 @@ +# DFT / Scan — Quickstart (Before vs After) + +This is a short “how to run” guide. For implementation details, limitations, and scan-order benchmarks, see `doc-DFT.md`. + +## Before (Baseline: no DFT) + +Run the flow normally: + +- `make -C flow DESIGN_CONFIG=./designs/nangate45/ibex/config.mk FLOW_VARIANT=baseline_no_dft finish` + +## After (DFT Enabled: scan flops + stitched chain) + +Enable the two DFT hook scripts: + +- `POST_FLOORPLAN_TCL=$(pwd)/flow/scripts/dft_scan_post_floorplan.tcl` + - runs `scan_replace` (functional flops → scan flops) + - creates scan ports: `scan_enable_0`, `scan_in_0`, `scan_out_0` + - sets `set_case_analysis 0 [get_ports scan_enable_0]` (functional-mode timing) +- `PRE_GLOBAL_ROUTE_TCL=$(pwd)/flow/scripts/dft_scan_pre_global_route.tcl` + - runs `execute_dft_plan` (stitches the scan chain using placement) + +Example: + +- `make -C flow DESIGN_CONFIG=./designs/nangate45/ibex/config.mk FLOW_VARIANT=with_dft POST_FLOORPLAN_TCL=$(pwd)/flow/scripts/dft_scan_post_floorplan.tcl PRE_GLOBAL_ROUTE_TCL=$(pwd)/flow/scripts/dft_scan_pre_global_route.tcl finish` + +## Sanity Checks + +- Report the plan (from OpenROAD, after `scan_replace`): + - `report_dft_plan -verbose` +- Validate chain integrity from a finished netlist: + - `python3 flow/util/scan_chain_validate.py --verilog flow/results////6_final.v` +- Or validate from an ODB (runs `scan_replace` + `execute_dft_plan` in-memory and writes a temp netlist): + - `python3 flow/util/scan_chain_validate.py --odb flow/results////3_5_place_dp.odb --openroad $OPENROAD_EXE --liberty --sdc flow/results////3_place.sdc --ensure-ports --scan-replace --execute-dft-plan` + +## Compare “Before vs After” QoR + +- Routed wirelength / timing: compare `flow/results/<...>/metrics.json` and the OpenROAD/OpenSTA reports between `baseline_no_dft` and `with_dft`. +- Scan-chain wire metric on a fixed placement (also runs an NN heuristic for comparison): + - `python3 flow/util/scan_chain_cost.py --scan-replace --nearest-neighbor --openroad $OPENROAD_EXE --liberty --odb flow/results/<...>/3_5_place_dp.odb --sdc flow/results/<...>/3_place.sdc` + diff --git a/doc-DFT.md b/doc-DFT.md new file mode 100644 index 0000000000..f202bcd78d --- /dev/null +++ b/doc-DFT.md @@ -0,0 +1,92 @@ +# DFT / Scan in ORFS (OpenROAD-flow-scripts) + +Quickstart: `doc-DFT-howto.md` + +This repo wires OpenROAD’s DFT scan insertion into the ORFS flow via **opt-in** hook scripts, plus utilities to measure scan-chain wirelength and validate scan stitching. + +## Required OpenROAD + +This branch pins the `tools/OpenROAD` submodule to **OpenROAD-clean-DFT**: + +- Base: `7bc521f36a` +- +1 commit (DFT fixes): `661abebbc3c70c59b4a3991acd176a5cc785f0d4` + +The key point: it works with **vanilla OpenSTA** (no OpenSTA parser patch required). + +## ORFS Flow Integration (Where DFT Happens) + +Two hook scripts are provided: + +- `flow/scripts/dft_scan_post_floorplan.tcl` + - Intended use: `POST_FLOORPLAN_TCL=$(pwd)/flow/scripts/dft_scan_post_floorplan.tcl` + - Runs after floorplan, before saving `2_1_floorplan.odb`: + - `set_dft_config -max_chains 1 -clock_mixing clock_mix` + - `scan_replace` (functional flops → scan flops) + - creates scan ports: `scan_enable_0`, `scan_in_0`, `scan_out_0` + - `set_case_analysis 0 [get_ports scan_enable_0]` (functional-mode timing) + +- `flow/scripts/dft_scan_pre_global_route.tcl` + - Intended use: `PRE_GLOBAL_ROUTE_TCL=$(pwd)/flow/scripts/dft_scan_pre_global_route.tcl` + - Runs after CTS, before global routing: + - `set_dft_config ...` (must match the post-floorplan config) + - `set_case_analysis 0 [get_ports scan_enable_0]` + - `execute_dft_plan` (stitches the scan chain using placement) + +Notes: +- The scripts currently hardcode `-max_chains 1` to keep scan I/O stable for comparisons. +- `set_case_analysis 0` ensures STA uses functional-mode arcs for scan flops. + +## OpenROAD-side Fixes (Summary) + +The OpenROAD-clean-DFT commit includes the minimum required fixes to make DFT “alive” on top of `7bc521f36a`: + +- Scan pin identification works in vanilla STA (`src/dbSta/src/dbSta.cc`): + - removes an overly-strict `extPort()` guard + - adds/uses fallback scan pin inference by common names (`SI/SE/SO`, etc.) +- DFT correctness fixes and functionality (DFT subsystem): + - scan stitching fixes (no dropped links) + - avoids reliance on `sta::TestCell` + - scan-out fallback behavior + - includes a small DFT regression (`scan_architect_no_mix_nangate45`) + +## Scan-Ordering Benchmark (OpenROAD vs Nearest-Neighbor) + +`flow/util/scan_chain_cost.py` runs OpenROAD’s `report_dft_plan -verbose`, computes total Manhattan scan-chain length, and can also compute a simple nearest-neighbor (NN) heuristic for comparison. + +Opt/NN results (lower is better; `openroad_over_nn < 1` means OpenROAD is shorter than NN): + +| platform | design | flops | openroad_um | nn_um | openroad_over_nn | +|---|---|---:|---:|---:|---:| +| nangate45 | aes | 562 | 3571.680 | 4178.080 | 0.855 | +| nangate45 | ibex | 1931 | 9197.880 | 10545.640 | 0.872 | +| nangate45 | jpeg | 4390 | 17903.670 | 20815.750 | 0.860 | +| asap7 | aes | 562 | 1053.810 | 1222.344 | 0.862 | +| asap7 | ibex | 273 | 428.652 | 514.404 | 0.833 | +| asap7 | jpeg | 4325 | 5045.058 | 5709.204 | 0.884 | +| sky130hd | aes | 562 | 11050.640 | 13137.940 | 0.841 | +| sky130hd | ibex | 1931 | 21754.680 | 24411.360 | 0.891 | +| sky130hd | jpeg | 4390 | 50973.380 | 57692.340 | 0.884 | + +Avg `opt/NN` = `0.865` (~`13.5%` shorter than NN). + +Reproduce (single design): + +- `python3 flow/util/scan_chain_cost.py --scan-replace --nearest-neighbor --openroad tools/install/OpenROAD/bin/openroad --liberty flow/platforms/nangate45/lib/NangateOpenCellLibrary_typical.lib --odb flow/results/nangate45/ibex/cmp9_or0db856_rp100_20251229_022425/3_5_place_dp.odb --sdc flow/results/nangate45/ibex/cmp9_or0db856_rp100_20251229_022425/3_place.sdc` + +Notes: +- ASAP7 needs multiple libs; pass them all, e.g. `--liberty flow/platforms/asap7/lib/NLDM/*_TT_*`. + +## Scan-Chain Integrity Validation (Does It Actually Shift?) + +QoR deltas and plan reports are necessary but not sufficient; we also want a basic structural check that the scan path is one continuous chain from `scan_in_0` to `scan_out_0`. + +- `flow/util/scan_chain_validate.py` validates scan stitching from a gate-level netlist (or from an ODB by writing a temporary netlist via OpenROAD). +- It treats `assign` + inserted `BUF*/CLKBUF*` as transparent, so post-P&R buffering doesn’t cause false failures. + +Example usage: + +- Validate a finished netlist: + - `python3 flow/util/scan_chain_validate.py --verilog flow/results/nangate45/ibex/with_dft/6_final.v` +- Validate from an ODB (writes a temp netlist first): + - `python3 flow/util/scan_chain_validate.py --odb flow/results/nangate45/ibex/with_dft/6_final.odb --openroad tools/install/OpenROAD/bin/openroad --liberty flow/platforms/nangate45/lib/NangateOpenCellLibrary_typical.lib --sdc flow/results/nangate45/ibex/with_dft/6_final.sdc --ensure-ports` + diff --git a/doc.md b/doc.md new file mode 100644 index 0000000000..d5d3ff9314 --- /dev/null +++ b/doc.md @@ -0,0 +1,635 @@ +# ORFS engineering work log (team) + +This is a shared, team-facing work log / index for ongoing workstreams in this +repo. Keep it **high-signal** and link out to dedicated docs for deep dives. + +## DFT / Scan insertion (OpenROAD DFT) + +Docs: +- `doc-DFT-howto.md`: quickstart (how to run ORFS with scan insertion) +- `doc-DFT.md`: design/implementation notes (knobs, algorithm, QoR deltas, validation tools) + +Branches on PrecisEDAnon GitHub: +- OpenROAD: + - [`OpenROAD-clean-DFT`](https://github.com/PrecisEDAnon/OpenROAD/tree/OpenROAD-clean-DFT) (baseline) + - [`OpenROAD-toggle-rebased-DFT`](https://github.com/PrecisEDAnon/OpenROAD/tree/OpenROAD-toggle-rebased-DFT) (active) +- OpenROAD-flow-scripts: + - [`ORFS-clean-DFT`](https://github.com/PrecisEDAnon/OpenROAD-flow-scripts/tree/ORFS-clean-DFT) (baseline) + - [`ORFS-toggle-rebased-DFT`](https://github.com/PrecisEDAnon/OpenROAD-flow-scripts/tree/ORFS-toggle-rebased-DFT) (active) + +Note: +- `ORFS-clean-DFT` is meant as a baseline snapshot; the knob list below reflects the active `ORFS-toggle-rebased-DFT` branch. + +How to run (ORFS): +- `POST_FLOORPLAN_TCL=$(pwd)/flow/scripts/dft_scan_post_floorplan.tcl` (runs `scan_replace`, creates scan ports) +- `PRE_GLOBAL_ROUTE_TCL=$(pwd)/flow/scripts/dft_scan_pre_global_route.tcl` (optional scan port placement + runs `execute_dft_plan`) + +Key knobs (ORFS-toggle-rebased-DFT): +- `DFT_CHAIN_COUNT`: fixed number of scan chains (exact) +- `DFT_MAX_CHAIN_LENGTH`/`DFT_MAX_LENGTH`: max bits per chain (also used to infer chain count when `DFT_CHAIN_COUNT` is not set) +- `DFT_PLACE_SCAN_PORTS`: re-place `scan_in_N`/`scan_out_N` near chain endpoints; defaults on when multi-chain is configured; override with `DFT_PLACE_SCAN_PORTS=0` +- `DFT_DONT_TOUCH_SCAN_NETS`: marks SCAN nets `dont_touch` post-stitching to reduce QoR-driven resizer churn on scan-only nets + +Algorithm sketch: +- Clustering/partitioning across chains: placement-aware reassignment (“swap/move”) under a per-chain max-length cap. +- Intra-chain ordering: “TSP path” heuristic (NN + farthest insertion + bounded 2‑opt). + +QoR snapshot (example: `nangate45/ibex`): +- DFT vs no-DFT typically costs ~`+8%` detailed-route WL, ~`+9%` instance area (seq area ~`+26%`), ~`+2%` total power. +- Functional timing is reported with scan disabled (`set_case_analysis 0 scan_enable_0`), so WS deltas are small/run-dependent. + +--- + +## Recover power work log (historical) + +This section is a running log of what we changed and what we measured while +trying to get a consistent power reduction across: + +- Platforms/PDKs: `asap7`, `sky130hd`, `nangate45` +- Designs: `aes`, `ibex`, `jpeg` +- Metric of record: `finish__power__total` from `flow/logs////6_report.json` + +This file started as a local/untracked note; it is now kept in-repo for team +handoffs. Avoid putting secrets/credentials in it. + + +## Handoff / quickstart (for another team) + +Goal: + +- Reproduce the **baseline vs ours** comparison using identical ORFS + OpenSTA and + identical design configs, with `RECOVER_POWER=100` in both cases. +- Baseline OpenROAD: `7bc521f36a` (v2.0-26260) +- Ours OpenROAD (recover_power rework + runtime fixes): `0db856789c` (v2.0-26264) +- OpenSTA: baseline `d7cb9be1` in both cases + +What someone needs to change vs vanilla ORFS `93c42b2e68`: + +- **Only** set `RECOVER_POWER=100` (either in the design `config.mk` or on the + `make` command line). No additional Tcl changes are required for recover-power + enablement on that ORFS snapshot. + +Where the handoff “packet” lives in this tree: + +- `tech-memo.md`: technical memo (EDA + code) describing baseline vs ours + `recover_power` behavior, correctness constraints, and measured deltas under + `RECOVER_POWER=100`. +- `table.md`: side-by-side comparison for the two installed-binary runs + (includes internal/switching/leakage breakdown, wirelength, ECP, area/count). +- Full flow artifacts for the installed-binary A/B runs: + - `flow/backup_install_6f9703_rp100_20251228_021115_20251228_055055/` + - `flow/backup_install_7bc521_rp100_20251228_055901_20251228_063557/` + - Each backup includes `meta/manifest.txt` recording ORFS head, OpenROAD hash, + OpenSTA hash, installed exe path, and knobs used. + +Minimal reproduce steps (A/B installed binary): + +1) Ensure submodules are present (note: submodule URLs in `.gitmodules` are + **relative** in this environment, e.g. `tools/OpenROAD` points at `../OpenROAD.git`; + update/sync them if cloning elsewhere). + +2) Build + install OpenROAD **(ours)**: + +- `git -C tools/OpenROAD checkout 0db856789c` +- `git -C tools/OpenROAD submodule update --init --recursive` +- Optional sanity: `git -C tools/OpenROAD/src/sta rev-parse --short=10 HEAD` should be `d7cb9be1ca` +- `make -C tools/OpenROAD/build -j"$(nproc)" install` +- Verify banner: `tools/install/OpenROAD/bin/openroad` prints + `OpenROAD v2.0-*-g0db856789c` at startup. + +3) Run the 9 shipped designs (3 platforms × 3 designs), in parallel: + +- Example: + - `OPENROAD_EXE=$PWD/tools/install/OpenROAD/bin/openroad RECOVER_POWER=100 CONFIG_COMMIT=93c42b2e68 EQUIVALENCE_CHECK=0 ./run_9_shipped_configs_parallel.sh install_ours_rp100_$(date +%Y%m%d_%H%M%S)` +- Outputs land in: + - `flow/logs////...` + - Wrapper logs: `flow/logs/run___.log` + +If the `config_.mk` snapshots are missing on a fresh clone: + +- Either run with a commit suffix you *do* have locally, or create the `93c42b2e68` + snapshots from git history, e.g.: + - `for p in asap7 sky130hd nangate45; do for d in aes ibex jpeg; do git show 93c42b2e68:flow/designs/$p/$d/config.mk > flow/designs/$p/$d/config_93c42b2e68.mk; done; done` + +4) Build + install OpenROAD **(baseline)** and re-run with a new variant: + +- `git -C tools/OpenROAD checkout 7bc521f36a` (or `orfs-baseline-7bc521`) +- `git -C tools/OpenROAD submodule update --init --recursive` +- Optional sanity: `git -C tools/OpenROAD/src/sta rev-parse --short=10 HEAD` should be `d7cb9be1ca` +- `make -C tools/OpenROAD/build -j"$(nproc)" install` +- Re-run: `./run_9_shipped_configs_parallel.sh install_7bc521_rp100_$(date +%Y%m%d_%H%M%S)` + +Notes on installs / submodule state: + +- `make install` overwrites `tools/install/OpenROAD/bin/openroad` (and `sta`). For + an A/B, either (a) run the flow after each install as above, or (b) copy the + binaries aside and use `OPENROAD_EXE=...` to point to the intended one. +- ORFS may pin `tools/OpenROAD` to an older gitlink; checking out other commits + inside the submodule for baseline builds will make `git status` show the + submodule as “modified”. Treat the OpenROAD banner in run logs as the ground + truth. + +5) Verify the correct binary + `RECOVER_POWER=100` were used: + +- OpenROAD hash is in the stage logs (banner line): + - `rg -F "OpenROAD v2.0" flow/logs/*/*//*.log` +- Recover-power invocation is visible in the GRT stage: + - `rg -F "repair_timing -verbose -recover_power 100" flow/logs/*/*//5_1_grt.log` + +6) Compare: + +- Power breakdown keys are in `flow/logs////6_report.json`: + - `finish__power__internal__total` + - `finish__power__switching__total` + - `finish__power__leakage__total` + - `finish__power__total` +- Wirelength is in `flow/logs////5_2_route.json`: + - `detailedroute__route__wirelength` (ignore the per-iter keys) +- ECP (as used here): `period - finish__timing__setup__ws` + - Period is from `flow/results////6_1_fill.sdc` (`create_clock -period ...`) +- Instance count/area: + - `6_report.json` has duplicate keys for `finish__design__instance__count` and + `finish__design__instance__area` (two different rollups); use the unambiguous + `finish__design__instance__count__stdcell` / `finish__design__instance__area__stdcell`. + +## 0. Repo / environment snapshot + +- ORFS repo HEAD: `f023cc896` +- “Shipped configs” snapshot commit used for design configs: `93c42b2e68` + - Local copies exist as `flow/designs///config_93c42b2e68.mk` +- Baseline OpenROAD for current comparisons: `7bc521f36a` (v2.0-26260) + - Preserved as branch: `tools/OpenROAD` → `orfs-baseline-7bc521` +- Current OpenROAD under test (recover_power rework + runtime fixes): `0db856789c` (v2.0-26264) +- Historical reference point (recover_power rework without runtime fixes): `6f9703be52` (v2.0-26263) + - Submodule: `tools/OpenROAD/` (pinned in ORFS at `6f9703be52` at time of initial measurements) + - OpenSTA submodule: `d7cb9be1` (baseline OpenSTA; no activity-model tweak) +- Optional combined OpenROAD+OpenSTA variant (not shipped): `89d7104824` + - Preserved as branch: `tools/OpenROAD` → `orfs-powerfix-sta` + - Preserved OpenSTA patch branch: `tools/OpenROAD/src/sta` → `orfs-power-density-wns` + +Local safety backup: + +- Prebuilt binaries from the removed worktrees were copied to: + - `tools/openroad_exe_backups_20251228_002226/` + +Previous baseline used earlier in this log (superseded by `7bc521`): + +- `98be0fa0be` (see §6.2) + +Host constraints: + +- `eqy` is not generally available on this machine → equivalence checking must + not hard-fail runs. + +--- + +## 1. What “RECOVER_POWER=100” actually means + +In ORFS, `RECOVER_POWER` flows through to OpenROAD as: + +- `repair_timing -recover_power ` + +In OpenROAD, this is an **effort / coverage knob** for power recovery: + +- It does **not** mean “100% power saving”. +- It means “consider (up to) ~100% of eligible candidates per pass” (and/or run + the most aggressive recovery settings). + +So the right interpretation is: + +- `RECOVER_POWER=0` → no power-recovery actions +- `RECOVER_POWER=100` → “max effort” power recovery + +--- + +## 2. Flow robustness: disabling EQY automatically + +Problem: + +- ORFS can be configured with `EQUIVALENCE_CHECK=1`, but many hosts won’t have + `eqy` installed; runs would fail for reasons unrelated to PPA. + +Fix: + +- `flow/scripts/load.tcl` now disables `EQUIVALENCE_CHECK` automatically when + `eqy` is not in `PATH`, and also skips if `eqy` fails to execute. + +Patch summary: + +- `flow/scripts/load.tcl`: + - Added `maybe_disable_equivalence_check` + - `run_equivalence_test` now checks `auto_execok eqy`, and wraps `exec eqy` + in `catch` to avoid hard-failing the run. + +--- + +## 3. OpenROAD code changes (tools/OpenROAD) + +All changes below refer to the OpenROAD-only powerfix commit: + +- `tools/OpenROAD` @ `6f9703be52` + +### 3.1 Build / CMake quality-of-life + +- `tools/OpenROAD/CMakeLists.txt` + - Generate `Version.hh` into the build tree (`${CMAKE_CURRENT_BINARY_DIR}/include/ord/Version.hh`) + rather than modifying the source tree. +- `tools/OpenROAD/src/CMakeLists.txt` + - Add `${CMAKE_BINARY_DIR}/include` to include dirs so the generated + `Version.hh` is found. +- `tools/OpenROAD/src/drt/CMakeLists.txt` + - Make VTune optional: `find_package(VTune QUIET)` so missing VTune doesn’t + fail configuration. + +### 3.2 Recover power rework (rsz) + +Files: + +- `tools/OpenROAD/src/rsz/src/RecoverPower.hh` +- `tools/OpenROAD/src/rsz/src/RecoverPower.cc` +- `tools/OpenROAD/src/rsz/include/rsz/Resizer.hh` +- `tools/OpenROAD/src/rsz/src/Resizer.cc` +- `tools/OpenROAD/src/rsz/README.md` (documents semantics and behavior) + +High-level behavior implemented: + +- Multi-pass recovery loop: repeatedly + - recompute WNS + - collect eligible candidates + - sort by a “power × slack headroom” score + - attempt safe swaps until no more progress +- Candidate selection: + - Prefer high-power instances with usable slack headroom + - Treat clock-network drivers specially (some have no meaningful data slack) + - Optionally allow safe non-clock buffer removal + - Filter out unconstrained/extreme slack artifacts +- Per-instance optimization: + 1) Try safe removal of redundant non-clock buffers + 2) Try “next smaller footprint” downsizes (area reduction → dynamic power) + 3) Try VT swaps toward lower leakage (if VT-equivalent cells exist) +- Safety guards: + - Preserve (or bound) setup/hold WNS via floors + - Do not increase max slew/cap/fanout violation counts + - Roll back rejected changes via the ECO journal + +Notable semantic choice: + +- When the design is already setup-failing (`WNS < 0`), recovery is allowed to + trade a small additional WNS budget for power (documented as a fraction of + min clock period, scaled by effort). This is visible in the results (some + designs cross from setup-closed to setup-failing when power recovery is ON). + +### 3.3 (Optional / not shipped) STA power default activity tweak (src/sta) + +File: + +- `tools/OpenROAD/src/sta/power/Power.cc` (OpenSTA commit `54f0fdbd`) + +Change: + +- Adjust default input activity “density” when setup timing is failing: + - if `WNS < 0`, treat the design as effectively running slower than the + requested period (`effective_period = period - WNS`), then set density + from that. + +Note (why we did not ship this): + +- This can affect `report_power` whenever default activity is used, and therefore + can confound `finish__power__total` comparisons when WNS changes sign/magnitude. +- We measured that the bulk of the reduction vs the baseline is already achieved + by OpenROAD `recover_power` netlist changes alone (with baseline OpenSTA), so + this tweak is not required. + +--- + +## 4. ORFS UX tweaks related to recover power + +- `flow/scripts/util.tcl` + - More explicit log banner text for the recover-power phase. +- `flow/scripts/variables.yaml` + - Clarified `RECOVER_POWER` description (effort semantics). +- `collect_metrics.py` + - Removed a debug print and fixed file newline. + +--- + +## 5. How runs were executed (repro) + +### 5.1 Inputs / configs + +- Used “shipped” config snapshots: + - `DESIGN_CONFIG=designs///config_93c42b2e68.mk` + +### 5.2 Targets + +- We ran to `6_report` to populate `6_report.json`: + - `logs////6_report.log` + +### 5.3 Key knobs + +- `RECOVER_POWER={0,100}` +- `EQUIVALENCE_CHECK=0` (forced off; also now auto-disabled if `eqy` missing) +- `OPENROAD_EXE=...` used to select the OpenROAD binary per comparison variant + +### 5.4 Wrapper logs + +Each run also has a wrapper log: + +- `flow/logs/run___.log` + +--- + +## 6. Experiments and results + +All power numbers below are from: + +- `finish__power__total` in `flow/logs////6_report.json` + +### 6.1 Our patched OpenROAD (default ORFS OpenROAD) — `RECOVER_POWER: 0 → 100` + +Variants: + +- Base: `eval_93c42b2e68_20251226_001739_base` (`RECOVER_POWER=0`) +- Pwr: `eval_93c42b2e68_20251226_001739_pwr` (`RECOVER_POWER=100`) + +Result summary: + +- Average power delta across 9: **-7.44%** +- Setup closure regressions (setup-closed → setup-failing in `finish__timing__setup__ws`): + - `asap7/jpeg` + - `sky130hd/aes` + - `sky130hd/jpeg` + +Per-design: + +| platform | design | base power (W) | pwr power (W) | Δ% | base setup WNS | pwr setup WNS | +|---|---:|---:|---:|---:|---:|---:| +| asap7 | aes | 0.153795 | 0.150799 | -1.948% | -19.143 | -51.448 | +| asap7 | ibex | 0.058131 | 0.046635 | -19.775% | -127.271 | -149.076 | +| asap7 | jpeg | 0.119744 | 0.117463 | -1.905% | 18.233 | -1.006 | +| sky130hd | aes | 0.457787 | 0.437447 | -4.443% | 0.121 | -0.126 | +| sky130hd | ibex | 0.093246 | 0.074849 | -19.730% | -0.466 | -0.899 | +| sky130hd | jpeg | 0.486010 | 0.476472 | -1.963% | 0.007 | -0.263 | +| nangate45 | aes | 0.385728 | 0.373988 | -3.044% | -0.031 | -0.056 | +| nangate45 | ibex | 0.096048 | 0.090754 | -5.511% | -0.021 | -0.093 | +| nangate45 | jpeg | 0.498596 | 0.455721 | -8.599% | -0.116 | -0.145 | + +Platform averages: + +- asap7: **-7.88%** +- sky130hd: **-8.71%** +- nangate45: **-5.72%** + +### 6.2 Baseline OpenROAD — `RECOVER_POWER: 0 → 100` + +Baseline OpenROAD snapshot used earlier in this log (superseded by `7bc521`): + +- OpenROAD commit: `98be0fa0be` (“power test”) +- Preserved as branch: `tools/OpenROAD` → `orfs-baseline-98be0fa-buildfix` (commit `b899aafed5`) +- Prebuilt binary backup: `tools/openroad_exe_backups_20251228_002226/openroad_98be0fa0be` + +Minimal compile-fix patch (now committed as `b899aafed5`, no intended behavior change): + +- `tools/OpenROAD_baseline/src/rsz/src/RecoverPower.hh` + - Removed unused `using sta::PathExpanded;` +- `tools/OpenROAD_baseline/src/rsz/src/RecoverPower.cc` + - Fixed `sta_->corners()` API usage (`sta::Corners*`) + +Variants: + +- Base: `orbase_98be0fa0be_93c42b2e68_20251226_051506_base` (`RECOVER_POWER=0`) +- Pwr: `orbase_98be0fa0be_93c42b2e68_20251226_051506_pwr` (`RECOVER_POWER=100`) + +Note: + +- The initial 9-way parallel `*_pwr` run timed out in the harness because + `asap7/jpeg` took much longer; it was re-run serially to completion. + +Result summary: + +- Average power delta across 9: **-2.34%** + +Per-design: + +| platform | design | base power (W) | pwr power (W) | Δ% | base setup WNS | pwr setup WNS | +|---|---:|---:|---:|---:|---:|---:| +| asap7 | aes | 0.153795 | 0.152023 | -1.152% | -19.143 | -27.104 | +| asap7 | ibex | 0.058131 | 0.057927 | -0.349% | -127.271 | -129.394 | +| asap7 | jpeg | 0.119744 | 0.119257 | -0.407% | 18.233 | 17.015 | +| sky130hd | aes | 0.457787 | 0.411592 | -10.091% | 0.121 | 0.040 | +| sky130hd | ibex | 0.093246 | 0.093097 | -0.160% | -0.466 | -0.424 | +| sky130hd | jpeg | 0.486010 | 0.463911 | -4.547% | 0.007 | 0.016 | +| nangate45 | aes | 0.385728 | 0.372757 | -3.363% | -0.031 | -0.030 | +| nangate45 | ibex | 0.096048 | 0.095333 | -0.744% | -0.021 | -0.046 | +| nangate45 | jpeg | 0.498596 | 0.497271 | -0.266% | -0.116 | -0.112 | + +Platform averages: + +- asap7: **-0.64%** +- sky130hd: **-4.93%** +- nangate45: **-1.46%** + +### 6.3 Baseline `7bc521` vs combined `89d710` — `RECOVER_POWER=100` in both + +Variants: + +- Baseline: `or7bc521_rp100_20251227_065427` + - run-time OpenROAD banner: `OpenROAD v2.0-26260-g7bc521f36a` + - rerun with: `OPENROAD_EXE=tools/openroad_exe_backups_20251228_002226/openroad_7bc521f36a` +- Combined: `or89d710_rp100_20251227_065427` (includes OpenSTA activity tweak) + - run-time OpenROAD banner: `OpenROAD v2.0-26264-g89d7104824` + - rerun with: `OPENROAD_EXE=tools/openroad_exe_backups_20251228_002226/openroad_89d7104824` + +Result summary: + +- Average power delta across 9 (ours vs `7bc521`): **-7.39%** +- Setup closure regressions (setup-closed → setup-failing in `finish__timing__setup__ws`): + - `asap7/jpeg` + - `sky130hd/aes` + - `sky130hd/jpeg` + +Per-design: + +| platform | design | 7bc521 power (W) | ours power (W) | Δ% | 7bc521 setup WNS | ours setup WNS | +|---|---:|---:|---:|---:|---:|---:| +| asap7 | aes | 0.153740 | 0.150799 | -1.913% | -23.502 | -51.448 | +| asap7 | ibex | 0.058104 | 0.046635 | -19.738% | -124.221 | -149.076 | +| asap7 | jpeg | 0.119701 | 0.117463 | -1.870% | 17.462 | -1.006 | +| sky130hd | aes | 0.456944 | 0.437447 | -4.267% | 0.020 | -0.126 | +| sky130hd | ibex | 0.093201 | 0.074849 | -19.690% | -0.424 | -0.899 | +| sky130hd | jpeg | 0.485904 | 0.476472 | -1.941% | 0.010 | -0.263 | +| nangate45 | aes | 0.385294 | 0.373988 | -2.934% | -0.032 | -0.056 | +| nangate45 | ibex | 0.095977 | 0.090754 | -5.442% | -0.033 | -0.093 | +| nangate45 | jpeg | 0.499218 | 0.455721 | -8.713% | -0.115 | -0.145 | + +### 6.4 “Pure OpenROAD” (no OpenSTA patch) vs baseline `7bc521` — `RECOVER_POWER=100` + +This isolates the OpenROAD `recover_power` rework from any OpenSTA changes: + +- OpenROAD commit: `6f9703be52` (recover_power rework) +- OpenSTA submodule: baseline `d7cb9be1` (no default-activity tweak) + +Variants: + +- Baseline: `or7bc521_rp100_20251227_065427` + - rerun with: `OPENROAD_EXE=tools/openroad_exe_backups_20251228_002226/openroad_7bc521f36a` +- OpenROAD-only: `or6f9703_nosta_rp100_20251227_191654` + - rerun with: `OPENROAD_EXE=tools/openroad_exe_backups_20251228_002226/openroad_6f9703be52` +- OpenROAD-only (installed) rerun: `install_6f9703_rp100_20251228_021115` + - run-time OpenROAD banner: `OpenROAD v2.0-26263-g6f9703be52` + - built/installed from `tools/OpenROAD` @ `orfs-powerfix-nosta` + - rerun with: `OPENROAD_EXE=tools/install/OpenROAD/bin/openroad` +- Baseline (installed) rerun: `install_7bc521_rp100_20251228_055901` + - run-time OpenROAD banner: `OpenROAD v2.0-26260-g7bc521f36a` + - built/installed from `tools/OpenROAD` @ `orfs-baseline-7bc521` + +Result summary: + +- Average power delta across 9 (OpenROAD-only vs `7bc521`): **-7.35%** + - This is the mean of per-design % deltas (each design equal weight); the + sum-of-powers delta across all 9 is **-5.33%** (see `table.md`). +- Average difference vs combined (`89d710`): **+0.07%** (OpenROAD-only slightly higher; negligible) +- Setup closure regressions vs baseline: same 3/9 + - `asap7/jpeg` + - `sky130hd/aes` + - `sky130hd/jpeg` + +Per-design: + +| platform | design | 7bc521 power (W) | nosta power (W) | Δ% | 7bc521 setup WNS | nosta setup WNS | +|---|---:|---:|---:|---:|---:|---:| +| asap7 | aes | 0.153740 | 0.150766 | -1.934% | -23.502 | -52.791 | +| asap7 | ibex | 0.058104 | 0.047250 | -18.679% | -124.221 | -146.473 | +| asap7 | jpeg | 0.119701 | 0.117466 | -1.867% | 17.462 | -0.310 | +| sky130hd | aes | 0.456944 | 0.437447 | -4.267% | 0.020 | -0.126 | +| sky130hd | ibex | 0.093201 | 0.075202 | -19.311% | -0.424 | -0.940 | +| sky130hd | jpeg | 0.485904 | 0.476595 | -1.916% | 0.010 | -0.278 | +| nangate45 | aes | 0.385294 | 0.373953 | -2.943% | -0.032 | -0.053 | +| nangate45 | ibex | 0.095977 | 0.089953 | -6.277% | -0.033 | -0.120 | +| nangate45 | jpeg | 0.499218 | 0.454395 | -8.979% | -0.115 | -0.143 | + +Platform averages (OpenROAD-only vs `7bc521`): + +- asap7: **-7.49%** +- sky130hd: **-8.50%** +- nangate45: **-6.07%** + +### 6.5 Committed runtime-fixed `recover_power` vs baseline `7bc521` — `RECOVER_POWER=100` + +This is the “current” A/B comparison for the committed runtime-fixed +implementation (OpenROAD `0db856789c`) versus baseline (OpenROAD `7bc521f36a`). +It re-runs the full 9-design suite from scratch after backing up and cleaning +the flow artifacts. + +Runs / variants: + +- Baseline: `cmp9_or7bc521_rp100_20251228_190831` + - stored under: `flow/backup_preclean_0db856_20251229_022357/logs/...` +- Ours: `cmp9_or0db856_rp100_20251229_022425` + - stored under: `flow/logs/...` + +Result summary: + +- Average power delta across 9 (mean of per-design % deltas): **-8.540%** +- Sum-of-powers delta across the 9 designs (aggregate watts): **-7.417%** +- Setup closure regressions vs baseline in `finish__timing__setup__ws` (sign flips): 3/9 + - `asap7/jpeg` + - `sky130hd/aes` + - `sky130hd/jpeg` +- DRV regression warnings from `recover_power` (`RSZ-0145`): 2/9 + - `sky130hd/jpeg` (slew violations) + - `nangate45/ibex` (max-cap violations) + +Intermediate run identity check: + +- The previously “intermediate/uncertain” run `cmp9_or6f97fast_rp100_20251228_193507` + matches `cmp9_or0db856_rp100_20251229_022425` exactly for `finish__power__total` + across all 9 designs (0.000% deltas), so treat that intermediate table as the + committed runtime-fixed implementation. + +Per-design: + +| platform | design | 7bc521 power (W) | ours power (W) | Δ% | 7bc521 setup WNS | ours setup WNS | +|---|---:|---:|---:|---:|---:|---:| +| asap7 | aes | 0.153740 | 0.150746 | -1.947% | -23.502 | -63.189 | +| asap7 | ibex | 0.058104 | 0.047237 | -18.703% | -124.221 | -153.513 | +| asap7 | jpeg | 0.119701 | 0.117483 | -1.853% | 17.462 | -1.618 | +| sky130hd | aes | 0.456944 | 0.437548 | -4.245% | 0.020 | -0.039 | +| sky130hd | ibex | 0.093201 | 0.075136 | -19.383% | -0.424 | -0.804 | +| sky130hd | jpeg | 0.485904 | 0.428012 | -11.914% | 0.010 | -0.298 | +| nangate45 | aes | 0.385294 | 0.373668 | -3.017% | -0.032 | -0.055 | +| nangate45 | ibex | 0.095977 | 0.089371 | -6.883% | -0.033 | -0.132 | +| nangate45 | jpeg | 0.499218 | 0.454714 | -8.915% | -0.115 | -0.140 | + +Recover-power runtime (from `5_1_grt.log`: “Took … seconds: repair_timing -verbose -recover_power 100”): + +| platform | design | 7bc521 time (s) | ours time (s) | +|---|---|---:|---:| +| asap7 | aes | 7 | 189 | +| asap7 | ibex | 26 | 421 | +| asap7 | jpeg | 41 | 519 | +| sky130hd | aes | 10 | 147 | +| sky130hd | ibex | 20 | 419 | +| sky130hd | jpeg | 26 | 547 | +| nangate45 | aes | N/A | 132 | +| nangate45 | ibex | 20 | 450 | +| nangate45 | jpeg | 36 | 556 | + +Notes: + +- The sky130hd/jpeg power delta is substantially larger than the historical + OpenROAD-only run (`install_6f9703_rp100_20251228_021115`) and coincides with + an `RSZ-0145` warning (DRV count increase). Treat this case as “power win with + DRV caveat” until DRV preservation is tightened. + +Backups of the full flow artifacts (logs/reports/results/objects) for the two +installed-binary A/B runs: + +- Powerfix install run: `flow/backup_install_6f9703_rp100_20251228_021115_20251228_055055/` +- Baseline install run: `flow/backup_install_7bc521_rp100_20251228_055901_20251228_063557/` + +--- + +## 7. Conclusions so far (as of this log) + +1) **Recover power is a real baseline feature**, but baseline OpenROAD + `RECOVER_POWER=100` does **not** yield ~5% consistently across this 9-design set: + it averages **~2.3%**. + +2) The current patched OpenROAD implementation produces **~8.5% average** + reduction on this 9-design set (see §6.5: **-8.540%** mean of per-design + deltas; **-7.417%** aggregate watts vs baseline), but it can reduce final + setup margin at `RECOVER_POWER=100`: + - 3/9 designs show `finish__timing__setup__ws` sign flips vs baseline. + - 2/9 designs emit `RSZ-0145` warnings (DRV count increases). + +3) Because ORFS power is sourced from OpenSTA `report_power` (and default + activity assumptions), any changes that alter default activity (or tie + activity to timing) can materially affect `finish__power__total`. This is + especially important for runs where WNS changes sign or magnitude. + +4) The measured power delta is dominated by netlist changes in `recover_power` + (downsizing, non-clock buffer removal, VT swaps), not by a reporting-model + tweak to default activity. Historical “OpenSTA activity” variants were not + required to reproduce the bulk of the delta on this set. + +5) Baseline `recover_power` is conservative and timing-driven (path-based driver + downsizing). The rework is power-driven and global (instance power × slack + headroom ranking) with a broader move set and multi-pass iteration. + +6) On this 9-design set, the total-power reduction is dominated by **internal + power** (≈74% of total Δ in aggregate watts, with most of the remainder in + switching; leakage is small in absolute watts). This correlates with fewer + buffering instances and lower stdcell area/count (see `table.md` for the + installed-binary A/B breakdown). + +--- + +## 8. Pointers to other local notes + +- `state-dec-24.md` contains additional context about repo/config state and + the earlier “config.mk mismatch” issue. +- `tech-memo.md` is the technical memo describing the OpenROAD `recover_power` + rework (and why it differs from baseline). +- `table.md` contains the full per-metric baseline-vs-ours comparison for the + two installed-binary A/B runs. diff --git a/flow/scripts/dft_scan_post_floorplan.tcl b/flow/scripts/dft_scan_post_floorplan.tcl new file mode 100644 index 0000000000..0600d767bd --- /dev/null +++ b/flow/scripts/dft_scan_post_floorplan.tcl @@ -0,0 +1,44 @@ +# DFT scan insertion hook for ORFS. +# +# Intended use: set `POST_FLOORPLAN_TCL` to this file. +# +# This runs after floorplan, before saving `2_1_floorplan.odb`, so subsequent +# stages (place/cts/route) see scan flops and scan ports. + +puts "DFT: scan_replace + create scan ports" + +# Keep the number of scan ports stable for QoR comparisons. +# With clock mixing enabled, all scan cells share one hash domain, so -max_chains +# applies globally. +set_dft_config -max_chains 1 -clock_mixing clock_mix + +# Replace functional flops with scan-capable flops. +scan_replace + +proc dft_ensure_scan_port {port_name io_type} { + set block [ord::get_db_block] + + set bterm [$block findBTerm $port_name] + if { $bterm != "NULL" } { + return + } + + set net [$block findNet $port_name] + if { $net == "NULL" } { + set net [odb::dbNet_create $block $port_name] + $net setSigType SCAN + } + + set bterm [odb::dbBTerm_create $net $port_name] + $bterm setSigType SCAN + $bterm setIoType $io_type +} + +# One-chain scan I/O + shared enable. +dft_ensure_scan_port "scan_enable_0" INPUT +dft_ensure_scan_port "scan_in_0" INPUT +dft_ensure_scan_port "scan_out_0" OUTPUT + +# Functional-mode assumption for STA/power (disable scan path). +set_case_analysis 0 [get_ports scan_enable_0] + diff --git a/flow/scripts/dft_scan_pre_global_route.tcl b/flow/scripts/dft_scan_pre_global_route.tcl new file mode 100644 index 0000000000..fa81399add --- /dev/null +++ b/flow/scripts/dft_scan_pre_global_route.tcl @@ -0,0 +1,17 @@ +# DFT scan insertion hook for ORFS. +# +# Intended use: set `PRE_GLOBAL_ROUTE_TCL` to this file. +# +# This runs after CTS, before global routing, so scan-chain connections are +# included in routing. + +puts "DFT: execute_dft_plan (stitch scan chains)" + +# Must match `flow/scripts/dft_scan_post_floorplan.tcl`. +set_dft_config -max_chains 1 -clock_mixing clock_mix + +# Ensure functional-mode STA/power assumptions in this stage too. +set_case_analysis 0 [get_ports scan_enable_0] + +execute_dft_plan + diff --git a/flow/util/scan_chain_cost.py b/flow/util/scan_chain_cost.py new file mode 100644 index 0000000000..95411df03b --- /dev/null +++ b/flow/util/scan_chain_cost.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import re +import subprocess +import tempfile +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Sequence, Tuple + + +@dataclass(frozen=True) +class ChainMetrics: + name: str + cells: int + manhattan_dbu: int + manhattan_um: Optional[float] + avg_step_um: Optional[float] + naive_lex_manhattan_um: Optional[float] + naive_lex_ratio: Optional[float] + nearest_neighbor_manhattan_um: Optional[float] + openroad_over_nn_ratio: Optional[float] + + +def _tcl_quote(path: Path) -> str: + return "{" + str(path) + "}" + + +def run_openroad_plan( + *, + openroad_exe: Path, + liberties: Sequence[Path], + odb: Path, + sdc: Path, + out_def: Path, + max_chains: int, + clock_mixing: str, + do_scan_replace: bool, + verbose: bool, +) -> str: + tcl_lines: List[str] = [ + *[f"read_liberty {_tcl_quote(lib)}" for lib in liberties], + f"read_db {_tcl_quote(odb)}", + ] + if sdc.exists(): + tcl_lines.append(f"read_sdc {_tcl_quote(sdc)}") + tcl_lines.append( + f"set_dft_config -max_chains {max_chains} -clock_mixing {clock_mixing}" + ) + if do_scan_replace: + tcl_lines.append("scan_replace") + tcl_lines += [ + "report_dft_plan -verbose", + f"write_def {_tcl_quote(out_def)}", + "exit", + ] + + with tempfile.NamedTemporaryFile( + mode="w", + prefix="scan_chain_cost_", + suffix=".tcl", + delete=False, + dir=os.getcwd(), + ) as tcl_file: + tcl_path = Path(tcl_file.name) + tcl_file.write("\n".join(tcl_lines) + "\n") + + try: + proc = subprocess.run( + [str(openroad_exe), "-exit", str(tcl_path)], + cwd=os.getcwd(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + check=False, + ) + if proc.returncode != 0: + raise RuntimeError( + f"OpenROAD failed (exit {proc.returncode}). Output:\n{proc.stdout}" + ) + if verbose: + print(proc.stdout, end="") + return proc.stdout + finally: + try: + tcl_path.unlink() + except FileNotFoundError: + pass + + +def parse_report_dft_plan_verbose(openroad_output: str) -> Dict[str, List[str]]: + chains: Dict[str, List[str]] = {} + current: Optional[str] = None + + chain_re = re.compile(r"^Scan chain '([^']+)' has (\d+) cells") + for line in openroad_output.splitlines(): + m = chain_re.match(line) + if m: + current = m.group(1) + chains[current] = [] + continue + if current is None: + continue + if line.startswith(" "): + chains[current].append(line.strip().split()[0]) + + return chains + + +def parse_def_units_and_coords( + def_path: Path, needed_insts: Iterable[str] +) -> Tuple[Optional[int], Dict[str, Tuple[int, int]]]: + needed = set(needed_insts) + coords: Dict[str, Tuple[int, int]] = {} + units: Optional[int] = None + + in_components = False + place_re = re.compile( + r"\+\s+(?:PLACED|FIXED)\s*\(\s*(-?\d+)\s+(-?\d+)\s*\)", re.IGNORECASE + ) + component_buf: List[str] = [] + + with def_path.open() as f: + for line in f: + if units is None: + m = re.match(r"^UNITS\s+DISTANCE\s+MICRONS\s+(\d+)\s*;", line) + if m: + units = int(m.group(1)) + + stripped = line.lstrip() + if stripped.startswith("COMPONENTS"): + in_components = True + continue + if stripped.startswith("END COMPONENTS"): + in_components = False + if len(coords) == len(needed): + break + continue + if not in_components: + continue + + if not component_buf: + if not stripped.startswith("-"): + continue + component_buf = [stripped.rstrip("\n")] + else: + component_buf.append(stripped.rstrip("\n")) + + if ";" not in stripped: + continue + + component = " ".join(component_buf) + component_buf = [] + + # First two tokens are: - ... + tokens = component.split() + if len(tokens) < 3 or tokens[0] != "-": + continue + inst_name = tokens[1] + if inst_name not in needed: + continue + + m = place_re.search(component) + if not m: + continue + coords[inst_name] = (int(m.group(1)), int(m.group(2))) + if len(coords) == len(needed): + break + + return units, coords + + +def manhattan_path_dbu(order: Sequence[str], coords: Dict[str, Tuple[int, int]]) -> int: + total = 0 + last_xy: Optional[Tuple[int, int]] = None + for inst in order: + xy = coords[inst] + if last_xy is not None: + total += abs(xy[0] - last_xy[0]) + abs(xy[1] - last_xy[1]) + last_xy = xy + return total + + +def nearest_neighbor_manhattan_path_dbu( + order: Sequence[str], coords: Dict[str, Tuple[int, int]], *, start: Optional[str] = None +) -> int: + if not order: + return 0 + if start is None: + start = order[0] + if start not in coords: + raise KeyError(start) + + remaining = set(order) + remaining.remove(start) + cur = start + total = 0 + + while remaining: + cx, cy = coords[cur] + + def key(inst: str) -> Tuple[int, str]: + x, y = coords[inst] + return (abs(x - cx) + abs(y - cy), inst) + + nxt = min(remaining, key=key) + x, y = coords[nxt] + total += abs(x - cx) + abs(y - cy) + remaining.remove(nxt) + cur = nxt + + return total + + +def compute_chain_metrics( + chain_name: str, + order: Sequence[str], + coords: Dict[str, Tuple[int, int]], + units: Optional[int], + compute_nearest_neighbor: bool, +) -> ChainMetrics: + manhattan_dbu = manhattan_path_dbu(order, coords) + manhattan_um = (manhattan_dbu / units) if units else None + + avg_step_um: Optional[float] + if units and len(order) > 1: + avg_step_um = (manhattan_dbu / units) / (len(order) - 1) + else: + avg_step_um = None + + naive_lex_manhattan_um: Optional[float] + naive_lex_ratio: Optional[float] + if units and len(order) > 1: + naive_lex_dbu = manhattan_path_dbu(sorted(order), coords) + naive_lex_manhattan_um = naive_lex_dbu / units + naive_lex_ratio = naive_lex_dbu / manhattan_dbu if manhattan_dbu else None + else: + naive_lex_manhattan_um = None + naive_lex_ratio = None + + nearest_neighbor_manhattan_um: Optional[float] + openroad_over_nn_ratio: Optional[float] + if compute_nearest_neighbor and units and len(order) > 1: + nn_dbu = nearest_neighbor_manhattan_path_dbu(order, coords, start=order[0]) + nearest_neighbor_manhattan_um = nn_dbu / units + openroad_over_nn_ratio = (manhattan_dbu / nn_dbu) if nn_dbu else None + else: + nearest_neighbor_manhattan_um = None + openroad_over_nn_ratio = None + + return ChainMetrics( + name=chain_name, + cells=len(order), + manhattan_dbu=manhattan_dbu, + manhattan_um=manhattan_um, + avg_step_um=avg_step_um, + naive_lex_manhattan_um=naive_lex_manhattan_um, + naive_lex_ratio=naive_lex_ratio, + nearest_neighbor_manhattan_um=nearest_neighbor_manhattan_um, + openroad_over_nn_ratio=openroad_over_nn_ratio, + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Compute a TSP-like scan-chain length metric from OpenROAD's " + "`report_dft_plan -verbose` output and instance origins." + ) + ) + parser.add_argument("--openroad", required=True, type=Path) + parser.add_argument( + "--liberty", + required=True, + type=Path, + action="append", + help="Liberty file to load (repeatable).", + ) + parser.add_argument("--odb", required=True, type=Path) + parser.add_argument( + "--sdc", + required=True, + type=Path, + help="Used so `report_dft_plan` can infer clock domains; still runs if missing.", + ) + parser.add_argument("--max-chains", type=int, default=1) + parser.add_argument("--clock-mixing", default="clock_mix") + parser.add_argument( + "--scan-replace", + action="store_true", + help="Run `scan_replace` before reporting the plan (useful for no-DFT ODBs).", + ) + parser.add_argument( + "--out-json", + type=Path, + default=None, + help="Write machine-readable metrics JSON to this path.", + ) + parser.add_argument( + "--verbose-openroad", + action="store_true", + help="Print the full OpenROAD output (includes the full chain listing).", + ) + parser.add_argument( + "--nearest-neighbor", + action="store_true", + help="Compute a simple nearest-neighbor TSP heuristic for comparison.", + ) + args = parser.parse_args() + + openroad_exe = args.openroad.resolve() + liberties = [p.resolve() for p in args.liberty] + odb = args.odb.resolve() + sdc = args.sdc.resolve() + + for path in (openroad_exe, odb, *liberties): + if not path.exists(): + raise FileNotFoundError(path) + + with tempfile.TemporaryDirectory(prefix="scan_chain_cost_", dir=os.getcwd()) as td: + tmp_dir = Path(td) + out_def = tmp_dir / "design.def" + + openroad_output = run_openroad_plan( + openroad_exe=openroad_exe, + liberties=liberties, + odb=odb, + sdc=sdc, + out_def=out_def, + max_chains=args.max_chains, + clock_mixing=args.clock_mixing, + do_scan_replace=args.scan_replace, + verbose=args.verbose_openroad, + ) + + chains = parse_report_dft_plan_verbose(openroad_output) + if not chains: + print("No scan chains found in `report_dft_plan -verbose` output.") + return 2 + + needed = [inst for order in chains.values() for inst in order] + units, coords = parse_def_units_and_coords(out_def, needed) + + missing = [inst for inst in needed if inst not in coords] + if missing: + raise RuntimeError( + f"Missing {len(missing)}/{len(needed)} chain instances in DEF output. " + f"First missing: {missing[0]}" + ) + + metrics = [ + compute_chain_metrics( + name, + order, + coords, + units, + compute_nearest_neighbor=args.nearest_neighbor, + ) + for name, order in chains.items() + ] + metrics.sort(key=lambda m: m.name) + + total_um = ( + sum(m.manhattan_um for m in metrics if m.manhattan_um is not None) + if units + else None + ) + + print(f"Chains: {len(metrics)}") + if units: + print(f"DEF units: {units} DBU per micron") + for m in metrics: + if m.manhattan_um is None: + print(f"{m.name}: cells={m.cells} manhattan_dbu={m.manhattan_dbu}") + continue + print( + f"{m.name}: cells={m.cells} " + f"manhattan_um={m.manhattan_um:.3f} " + f"avg_step_um={m.avg_step_um:.3f} " + f"naive_lex_um={m.naive_lex_manhattan_um:.3f} " + f"naive_lex_ratio={m.naive_lex_ratio:.3f}" + + ( + f" nn_um={m.nearest_neighbor_manhattan_um:.3f} " + f"openroad_over_nn={m.openroad_over_nn_ratio:.3f}" + if m.nearest_neighbor_manhattan_um is not None + and m.openroad_over_nn_ratio is not None + else "" + ) + ) + if total_um is not None: + print(f"total_manhattan_um={total_um:.3f}") + + if args.out_json: + payload = { + "units_dbu_per_micron": units, + "chains": [asdict(m) for m in metrics], + "total_manhattan_um": total_um, + } + args.out_json.parent.mkdir(parents=True, exist_ok=True) + args.out_json.write_text(json.dumps(payload, indent=2, sort_keys=True)) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/flow/util/scan_chain_validate.py b/flow/util/scan_chain_validate.py new file mode 100644 index 0000000000..db04c3f2f9 --- /dev/null +++ b/flow/util/scan_chain_validate.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python3 + +import argparse +import os +import re +import subprocess +import tempfile +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple + + +SCAN_IN_PINS = ("SI", "SCD", "SCANIN", "SCAN_IN", "SCAN_DATA_IN") +SCAN_ENABLE_PINS = ("SE", "SCE", "SCAN_EN", "SCAN_ENABLE", "SCANENABLE") +SCAN_OUT_PINS = ("SO", "SCO", "SCANOUT", "SCAN_OUT", "SCAN_DATA_OUT") +FALLBACK_OUT_PINS = ("Q", "QN") +PASS_THROUGH_CELL_PREFIXES = ("BUF", "CLKBUF", "INV") + + +@dataclass(frozen=True) +class ScanCell: + name: str + scan_in_pin: str + scan_in_net: str + scan_enable_pin: str + scan_enable_net: str + port_nets: Dict[str, str] + + +@dataclass(frozen=True) +class ValidationSummary: + scan_cells_found: int + chains_found: int + chain_cells: int + start_cell: Optional[str] + end_cell: Optional[str] + scan_enable_net: Optional[str] + scan_out_source_net: Optional[str] + broken_links: int + orphan_cells: int + errors: List[str] + + +def _tcl_quote(path: Path) -> str: + return "{" + str(path) + "}" + + +def _normalize_verilog_ident(token: str) -> str: + token = token.strip() + if token.startswith("\\"): + token = token[1:] + return token + + +def _strip_trailing_delims(token: str) -> str: + return token.rstrip(",;") + + +def _extract_simple_assignments(lines: Iterable[str]) -> Dict[str, str]: + # Only handle: assign lhs = rhs; + assigns: Dict[str, str] = {} + assign_re = re.compile(r"^\s*assign\s+(\S+)\s*=\s*(\S+)\s*;\s*$") + for line in lines: + m = assign_re.match(line) + if not m: + continue + lhs = _normalize_verilog_ident(_strip_trailing_delims(m.group(1))) + rhs = _normalize_verilog_ident(_strip_trailing_delims(m.group(2))) + assigns[lhs] = rhs + return assigns + + +def _resolve_alias(assigns: Dict[str, str], net: str) -> str: + cur = net + seen: Set[str] = set() + while cur in assigns and cur not in seen: + seen.add(cur) + cur = assigns[cur] + return cur + + +def _is_pass_through_cell(cell_type: str) -> bool: + return cell_type.startswith(PASS_THROUGH_CELL_PREFIXES) + + +def parse_scan_cells_from_verilog( + verilog_path: Path, +) -> Tuple[List[ScanCell], Dict[str, str], Dict[str, str]]: + scan_cells: List[ScanCell] = [] + + with verilog_path.open() as f: + lines = f.readlines() + + assigns = _extract_simple_assignments(lines) + driven_by: Dict[str, str] = dict(assigns) + + inst_start_re = re.compile(r"^\s*(\S+)\s+(\S+)\s*\(") + # `.PORT(net)` where `net` is treated as a single Verilog token. + port_re = re.compile(r"\.(\w+)\(\s*([^\)\s]+)\s*\)") + + in_inst = False + inst_type: Optional[str] = None + inst_name: Optional[str] = None + buf: List[str] = [] + + def flush_instance() -> None: + nonlocal in_inst, inst_type, inst_name, buf + if not in_inst or inst_name is None: + in_inst = False + inst_type = None + inst_name = None + buf = [] + return + + text = " ".join(buf) + port_nets_raw = {m.group(1): m.group(2) for m in port_re.finditer(text)} + port_nets = { + pin: _normalize_verilog_ident(_strip_trailing_delims(net)) + for pin, net in port_nets_raw.items() + } + + scan_in_pin = next((p for p in SCAN_IN_PINS if p in port_nets), None) + scan_enable_pin = next((p for p in SCAN_ENABLE_PINS if p in port_nets), None) + if scan_in_pin and scan_enable_pin: + scan_cells.append( + ScanCell( + name=_normalize_verilog_ident(inst_name), + scan_in_pin=scan_in_pin, + scan_in_net=port_nets[scan_in_pin], + scan_enable_pin=scan_enable_pin, + scan_enable_net=port_nets[scan_enable_pin], + port_nets=port_nets, + ) + ) + elif inst_type and _is_pass_through_cell(inst_type): + # Collapse simple pass-through combinational instances so we can validate + # scan connectivity even after buffer insertion/resizing. + in_net = port_nets.get("A") or port_nets.get("I") + out_net = port_nets.get("Z") or port_nets.get("ZN") + if in_net and out_net: + existing = driven_by.get(out_net) + if existing and existing != in_net: + raise RuntimeError( + f"Net '{out_net}' appears to have multiple pass-through drivers: " + f"'{existing}' and '{in_net}'." + ) + driven_by[out_net] = in_net + + in_inst = False + inst_type = None + inst_name = None + buf = [] + + for line in lines: + if not in_inst: + m = inst_start_re.match(line) + if not m: + continue + cell_type = m.group(1) + if cell_type in ("module", "assign", "endmodule"): + continue + inst_type = cell_type + inst_name = m.group(2) + in_inst = True + buf = [line] + if ");" in line: + flush_instance() + continue + + buf.append(line) + if ");" in line: + flush_instance() + + flush_instance() + return scan_cells, assigns, driven_by + + +def _cell_output_nets_for_stitching(cell: ScanCell) -> List[str]: + nets: List[str] = [] + for pin in SCAN_OUT_PINS + FALLBACK_OUT_PINS: + net = cell.port_nets.get(pin) + if not net: + continue + if net not in nets: + nets.append(net) + return nets + + +def reconstruct_single_chain( + scan_cells: Sequence[ScanCell], + *, + scan_in_net: str, + scan_out_source_net: str, + driven_by: Dict[str, str], +) -> Tuple[List[str], List[str], int]: + name_to_cell = {c.name: c for c in scan_cells} + + def root_driver(net: str) -> str: + cur = net + seen: Set[str] = set() + while cur in driven_by and cur not in seen: + seen.add(cur) + cur = driven_by[cur] + return cur + + si_root_to_cells: Dict[str, List[str]] = {} + for c in scan_cells: + si_root = root_driver(c.scan_in_net) + si_root_to_cells.setdefault(si_root, []).append(c.name) + + errors: List[str] = [] + broken_links = 0 + + start_candidates = si_root_to_cells.get(scan_in_net, []) + if len(start_candidates) != 1: + errors.append( + f"Expected exactly 1 scan cell driven by {scan_in_net} on scan-in; " + f"found {len(start_candidates)}." + ) + return [], errors, broken_links + + chain: List[str] = [] + visited: Set[str] = set() + + cur_name = start_candidates[0] + while True: + if cur_name in visited: + errors.append(f"Loop detected at scan cell '{cur_name}'.") + break + visited.add(cur_name) + chain.append(cur_name) + + cur_cell = name_to_cell[cur_name] + out_candidates = _cell_output_nets_for_stitching(cur_cell) + next_nets = [n for n in out_candidates if n in si_root_to_cells] + + if len(next_nets) > 1: + errors.append( + f"Ambiguous scan stitch: cell '{cur_name}' has multiple outputs feeding scan inputs: " + f"{', '.join(next_nets[:8])}{'...' if len(next_nets) > 8 else ''}" + ) + broken_links += 1 + break + + if len(next_nets) == 1: + next_net = next_nets[0] + dst_cells = si_root_to_cells.get(next_net, []) + if len(dst_cells) != 1: + errors.append( + f"Expected net '{next_net}' to feed exactly 1 scan cell SI; found {len(dst_cells)}." + ) + broken_links += 1 + break + cur_name = dst_cells[0] + continue + + # No next: treat as end-of-chain and validate scan-out. + scan_out_root = root_driver(scan_out_source_net) + if scan_out_root not in out_candidates: + errors.append( + f"End-of-chain mismatch: last cell '{cur_name}' does not drive scan_out " + f"(expected '{scan_out_root}'; outputs are {', '.join(out_candidates)})." + ) + broken_links += 1 + break + + orphan_cells = len(scan_cells) - len(visited) + if orphan_cells: + errors.append( + f"Orphan scan cells: visited {len(visited)}/{len(scan_cells)}; {orphan_cells} not in chain." + ) + + return chain, errors, broken_links + + +def run_openroad_write_verilog( + *, + openroad_exe: Path, + liberties: Sequence[Path], + odb: Path, + sdc: Optional[Path], + out_verilog: Path, + max_chains: int, + clock_mixing: str, + do_scan_replace: bool, + do_execute_dft_plan: bool, + ensure_ports: bool, + verbose: bool, +) -> str: + tcl_lines: List[str] = [ + *[f"read_liberty {_tcl_quote(lib)}" for lib in liberties], + f"read_db {_tcl_quote(odb)}", + ] + if sdc and sdc.exists(): + tcl_lines.append(f"read_sdc {_tcl_quote(sdc)}") + + if ensure_ports: + tcl_lines += [ + "proc dft_ensure_scan_port {port_name io_type} {", + " set block [ord::get_db_block]", + " set bterm [$block findBTerm $port_name]", + " if { $bterm != \"NULL\" } {", + " return", + " }", + " set net [$block findNet $port_name]", + " if { $net == \"NULL\" } {", + " set net [odb::dbNet_create $block $port_name]", + " $net setSigType SCAN", + " }", + " set bterm [odb::dbBTerm_create $net $port_name]", + " $bterm setSigType SCAN", + " $bterm setIoType $io_type", + "}", + "dft_ensure_scan_port \"scan_enable_0\" INPUT", + "dft_ensure_scan_port \"scan_in_0\" INPUT", + "dft_ensure_scan_port \"scan_out_0\" OUTPUT", + ] + + tcl_lines.append( + f"set_dft_config -max_chains {max_chains} -clock_mixing {clock_mixing}" + ) + if do_scan_replace: + tcl_lines.append("scan_replace") + if do_execute_dft_plan: + tcl_lines.append("execute_dft_plan") + + tcl_lines += [ + f"write_verilog {_tcl_quote(out_verilog)}", + "exit", + ] + + with tempfile.NamedTemporaryFile( + mode="w", + prefix="scan_chain_validate_", + suffix=".tcl", + delete=False, + dir=os.getcwd(), + ) as tcl_file: + tcl_path = Path(tcl_file.name) + tcl_file.write("\n".join(tcl_lines) + "\n") + + try: + proc = subprocess.run( + [str(openroad_exe), "-exit", str(tcl_path)], + cwd=os.getcwd(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + check=False, + ) + if proc.returncode != 0: + raise RuntimeError( + f"OpenROAD failed (exit {proc.returncode}). Output:\n{proc.stdout}" + ) + if verbose: + print(proc.stdout, end="") + return proc.stdout + finally: + try: + tcl_path.unlink() + except FileNotFoundError: + pass + + +def validate_netlist( + verilog_path: Path, + *, + scan_in: str, + scan_out: str, + scan_enable: str, +) -> ValidationSummary: + scan_cells, assigns, driven_by = parse_scan_cells_from_verilog(verilog_path) + + scan_out_source = _resolve_alias(assigns, scan_out) + scan_in_net = _normalize_verilog_ident(scan_in) + scan_out_net = _normalize_verilog_ident(scan_out_source) + scan_enable_net = _normalize_verilog_ident(scan_enable) + + chain, errors, broken_links = reconstruct_single_chain( + scan_cells, + scan_in_net=scan_in_net, + scan_out_source_net=scan_out_net, + driven_by=driven_by, + ) + + def root_driver(net: str) -> str: + cur = net + seen: Set[str] = set() + while cur in driven_by and cur not in seen: + seen.add(cur) + cur = driven_by[cur] + return cur + + enable_roots = {root_driver(c.scan_enable_net) for c in scan_cells} + enable_net_value = next(iter(enable_roots)) if len(enable_roots) == 1 else None + + if enable_net_value is None: + errors.append( + f"Scan enable is not uniform across scan cells: {sorted(enable_roots)[:8]}" + f"{'...' if len(enable_roots) > 8 else ''}" + ) + elif enable_net_value != scan_enable_net: + errors.append( + f"Scan enable net mismatch: scan cells use '{enable_net_value}', expected '{scan_enable_net}'." + ) + + return ValidationSummary( + scan_cells_found=len(scan_cells), + chains_found=1 if chain else 0, + chain_cells=len(chain), + start_cell=chain[0] if chain else None, + end_cell=chain[-1] if chain else None, + scan_enable_net=enable_net_value, + scan_out_source_net=root_driver(scan_out_net), + broken_links=broken_links, + orphan_cells=max(0, len(scan_cells) - len(chain)), + errors=errors, + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Validate scan-chain stitching correctness from a gate-level Verilog netlist." + ) + input_group = parser.add_mutually_exclusive_group(required=True) + input_group.add_argument("--verilog", type=Path, help="Gate-level Verilog to validate.") + input_group.add_argument("--odb", type=Path, help="Write Verilog from this ODB first.") + + parser.add_argument("--openroad", type=Path, help="Required with --odb.") + parser.add_argument( + "--liberty", + type=Path, + action="append", + help="Liberty file to load (repeatable; required with --odb).", + ) + parser.add_argument("--sdc", type=Path, default=None, help="Optional; used with --odb.") + parser.add_argument("--max-chains", type=int, default=1) + parser.add_argument("--clock-mixing", default="clock_mix") + parser.add_argument("--scan-replace", action="store_true") + parser.add_argument("--execute-dft-plan", action="store_true") + parser.add_argument( + "--ensure-ports", + action="store_true", + help="Create scan ports if missing (idempotent); only used with --odb.", + ) + parser.add_argument("--scan-in", default="scan_in_0") + parser.add_argument("--scan-out", default="scan_out_0") + parser.add_argument("--scan-enable", default="scan_enable_0") + parser.add_argument("--out-json", type=Path, default=None) + parser.add_argument("--verbose-openroad", action="store_true") + args = parser.parse_args() + + verilog_path: Path + if args.verilog: + verilog_path = args.verilog.resolve() + if not verilog_path.exists(): + raise FileNotFoundError(verilog_path) + else: + odb = args.odb.resolve() + if not odb.exists(): + raise FileNotFoundError(odb) + if not args.openroad or not args.liberty: + raise ValueError("--openroad and at least one --liberty are required with --odb.") + openroad_exe = args.openroad.resolve() + liberties = [p.resolve() for p in args.liberty] + if not openroad_exe.exists(): + raise FileNotFoundError(openroad_exe) + for lib in liberties: + if not lib.exists(): + raise FileNotFoundError(lib) + + with tempfile.TemporaryDirectory(prefix="scan_chain_validate_", dir=os.getcwd()) as td: + tmp_dir = Path(td) + verilog_path = tmp_dir / "design.v" + + run_openroad_write_verilog( + openroad_exe=openroad_exe, + liberties=liberties, + odb=odb, + sdc=args.sdc.resolve() if args.sdc else None, + out_verilog=verilog_path, + max_chains=args.max_chains, + clock_mixing=args.clock_mixing, + do_scan_replace=args.scan_replace, + do_execute_dft_plan=args.execute_dft_plan, + ensure_ports=args.ensure_ports, + verbose=args.verbose_openroad, + ) + + summary = validate_netlist( + verilog_path, + scan_in=args.scan_in, + scan_out=args.scan_out, + scan_enable=args.scan_enable, + ) + + print(f"scan_cells_found={summary.scan_cells_found}") + print(f"chains_found={summary.chains_found}") + print(f"chain_cells={summary.chain_cells}") + if summary.start_cell: + print(f"start_cell={summary.start_cell}") + if summary.end_cell: + print(f"end_cell={summary.end_cell}") + if summary.scan_out_source_net: + print(f"scan_out_source_net={summary.scan_out_source_net}") + if summary.scan_enable_net: + print(f"scan_enable_net={summary.scan_enable_net}") + print(f"broken_links={summary.broken_links}") + print(f"orphan_cells={summary.orphan_cells}") + for e in summary.errors[:50]: + print(f"ERROR: {e}") + + if args.out_json: + payload = asdict(summary) + args.out_json.parent.mkdir(parents=True, exist_ok=True) + args.out_json.write_text( + __import__("json").dumps(payload, indent=2, sort_keys=True) + ) + + return 0 if not summary.errors else 2 + + # --verilog path (no OpenROAD) + summary = validate_netlist( + verilog_path, + scan_in=args.scan_in, + scan_out=args.scan_out, + scan_enable=args.scan_enable, + ) + + print(f"scan_cells_found={summary.scan_cells_found}") + print(f"chains_found={summary.chains_found}") + print(f"chain_cells={summary.chain_cells}") + if summary.start_cell: + print(f"start_cell={summary.start_cell}") + if summary.end_cell: + print(f"end_cell={summary.end_cell}") + if summary.scan_out_source_net: + print(f"scan_out_source_net={summary.scan_out_source_net}") + if summary.scan_enable_net: + print(f"scan_enable_net={summary.scan_enable_net}") + print(f"broken_links={summary.broken_links}") + print(f"orphan_cells={summary.orphan_cells}") + for e in summary.errors[:50]: + print(f"ERROR: {e}") + + if args.out_json: + payload = asdict(summary) + args.out_json.parent.mkdir(parents=True, exist_ok=True) + args.out_json.write_text(__import__("json").dumps(payload, indent=2, sort_keys=True)) + + return 0 if not summary.errors else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/OpenROAD b/tools/OpenROAD index 7bc521f36a..661abebbc3 160000 --- a/tools/OpenROAD +++ b/tools/OpenROAD @@ -1 +1 @@ -Subproject commit 7bc521f36a34c986885473856e9f5b464093e38a +Subproject commit 661abebbc3c70c59b4a3991acd176a5cc785f0d4 diff --git a/tools/yosys b/tools/yosys index 26b51148a8..9ed031ddd5 160000 --- a/tools/yosys +++ b/tools/yosys @@ -1 +1 @@ -Subproject commit 26b51148a80ea546481cf4f0516be97e4ba251cc +Subproject commit 9ed031ddd588442f22be13ce608547a5809b62f0