Skip to content

Commit 744b513

Browse files
authored
Merge pull request #9215 from The-OpenROAD-Project-staging/secure-deploy-insert-buffer
rsz: Deployed insert buffer for gain buffering
2 parents b9da01c + 56acd15 commit 744b513

File tree

12 files changed

+416
-133
lines changed

12 files changed

+416
-133
lines changed

src/odb/src/db/dbInsertBuffer.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ dbInst* dbInsertBuffer::insertBufferSimple(dbObject* term_obj,
128128
// 4. Rewire
129129
rewireBufferSimple(insertBefore, orig_mod_net, term_obj);
130130

131-
// 5. Place the new buffer
132131
// 5. Place the new buffer
133132
if (loc) {
134133
placeBufferAtLocation(buffer_inst, *loc);
@@ -192,7 +191,6 @@ dbInst* dbInsertBuffer::insertBufferBeforeLoads(
192191
populateReusableModNets(load_pins);
193192
rewireBufferLoadPins(load_pins);
194193

195-
// 5. Place the Buffer
196194
// 5. Place the Buffer
197195
if (loc) {
198196
placeBufferAtLocation(buffer_inst, *loc);
@@ -314,21 +312,23 @@ bool dbInsertBuffer::checkDontTouch(const dbITerm* iterm) const
314312
}
315313

316314
void dbInsertBuffer::placeBufferAtLocation(dbInst* buffer_inst,
317-
const Point& loc)
315+
const Point& loc,
316+
const char* reason)
318317
{
319318
buffer_inst->setLocation(loc.getX(), loc.getY());
320319
buffer_inst->setPlacementStatus(dbPlacementStatus::PLACED);
321-
dlogPlacedBuffer(buffer_inst, loc);
320+
dlogPlacedBuffer(buffer_inst, loc, reason);
322321
}
323322

324323
void dbInsertBuffer::placeBufferAtPin(dbInst* buffer_inst, const dbObject* term)
325324
{
326325
int x = 0;
327326
int y = 0;
328327
if (getPinLocation(term, x, y)) {
329-
placeBufferAtLocation(buffer_inst, Point(x, y));
328+
placeBufferAtLocation(buffer_inst, Point(x, y), "pin location");
330329
} else {
331330
buffer_inst->setPlacementStatus(dbPlacementStatus::UNPLACED);
331+
dlogUnplacedBuffer(buffer_inst, "pin location not available");
332332
}
333333
}
334334

@@ -1483,9 +1483,11 @@ void dbInsertBuffer::placeBufferAtCentroid(dbInst* buffer_inst,
14831483
{
14841484
Point placement_loc;
14851485
if (computeCentroid(drvr_pin, load_pins, placement_loc)) {
1486-
placeBufferAtLocation(buffer_inst, placement_loc);
1486+
placeBufferAtLocation(buffer_inst, placement_loc, "centroid");
14871487
} else {
14881488
buffer_inst->setPlacementStatus(dbPlacementStatus::UNPLACED);
1489+
dlogUnplacedBuffer(buffer_inst,
1490+
"centroid computation failed (no placed pins)");
14891491
}
14901492
}
14911493

@@ -1554,7 +1556,7 @@ void dbInsertBuffer::dlogLCAModule(const dbModule* target_module) const
15541556
"BeforeLoads: LCA module: {} '{}'",
15551557
target_module ? target_module->getName() : "null_module",
15561558
target_mod_inst ? target_mod_inst->getHierarchicalName()
1557-
: "<null_modinst_or_top>");
1559+
: "<top_or_null_mod_inst>");
15581560
}
15591561
}
15601562

@@ -1677,16 +1679,30 @@ void dbInsertBuffer::dlogMovedBTermLoad(int load_idx,
16771679
}
16781680

16791681
void dbInsertBuffer::dlogPlacedBuffer(const dbInst* buffer_inst,
1680-
const Point& loc) const
1682+
const Point& loc,
1683+
const char* reason) const
16811684
{
16821685
debugPrint(logger_,
16831686
utl::ODB,
16841687
"insert_buffer",
16851688
1,
1686-
"Placed the new buffer '{}' at ({}, {})",
1689+
"Placed the new buffer '{}' at ({}, {}) using {}",
16871690
buffer_inst->getName(),
16881691
loc.getX(),
1689-
loc.getY());
1692+
loc.getY(),
1693+
reason);
1694+
}
1695+
1696+
void dbInsertBuffer::dlogUnplacedBuffer(const dbInst* buffer_inst,
1697+
const char* reason) const
1698+
{
1699+
debugPrint(logger_,
1700+
utl::ODB,
1701+
"insert_buffer",
1702+
1,
1703+
"Buffer '{}' set to UNPLACED: {}",
1704+
buffer_inst->getName(),
1705+
reason);
16901706
}
16911707

16921708
void dbInsertBuffer::dlogInsertBufferSuccess(const dbInst* buffer_inst) const

src/odb/src/db/dbInsertBuffer.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,9 @@ class dbInsertBuffer
9292
bool computeCentroid(const dbObject* drvr_pin,
9393
const std::set<dbObject*>& load_pins,
9494
Point& result) const;
95-
void placeBufferAtLocation(dbInst* buffer_inst, const Point& loc);
95+
void placeBufferAtLocation(dbInst* buffer_inst,
96+
const Point& loc,
97+
const char* reason = "argument");
9698
void placeBufferAtPin(dbInst* buffer_inst, const dbObject* term);
9799
void placeBufferAtCentroid(dbInst* buffer_inst,
98100
const dbObject* drvr_pin,
@@ -202,7 +204,10 @@ class dbInsertBuffer
202204
void dlogMovedBTermLoad(int load_idx,
203205
int num_loads,
204206
const dbBTerm* load) const;
205-
void dlogPlacedBuffer(const dbInst* buffer_inst, const Point& loc) const;
207+
void dlogPlacedBuffer(const dbInst* buffer_inst,
208+
const Point& loc,
209+
const char* reason) const;
210+
void dlogUnplacedBuffer(const dbInst* buffer_inst, const char* reason) const;
206211
void dlogInsertBufferSuccess(const dbInst* buffer_inst) const;
207212
void dlogInsertBufferStart(int count, const char* mode) const;
208213
void dlogSeparator() const;

src/rsz/src/RepairDesign.cc

Lines changed: 53 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,30 @@ void RepairDesign::findBufferSizes()
649649
});
650650
}
651651

652+
/// Gain buffering: Make a buffer tree to satisfy fanout and cap constraints
653+
///
654+
/// Purpose: Reduce fanout count and total loading capacitance.
655+
///
656+
/// @param net The net to be buffered
657+
/// @param drvr_pin The driver pin of the net
658+
/// @param max_fanout Maximum fanout allowed per buffer (gain buffering
659+
/// constraint)
660+
///
661+
/// Algorithm:
662+
/// 1. Sinks are collected and sorted by timing criticality (required time).
663+
/// 2. Sinks are grouped based on driving cell's max fanout and max buffer
664+
/// load capacity (e.g., 9 * input cap of the largest buffer).
665+
/// 3. Insert the smallest buffer satisfying the following criteria.
666+
/// "buffer_input_cap > accumulated_load_cap / gain ratio"
667+
/// 4. The new buffer's input is treated as a new sink and re-added to the
668+
/// queue to recursively build the tree.
669+
/// 5. Incremental timing update.
670+
///
671+
/// Why use required time instead of slack?
672+
/// - Arrival times change as the buffer tree is built, while required time
673+
/// does not change. So required times is a more stable metric for bottom-up
674+
/// construction and critical path isolation.
675+
///
652676
bool RepairDesign::performGainBuffering(Net* net,
653677
const Pin* drvr_pin,
654678
int max_fanout)
@@ -701,7 +725,7 @@ bool RepairDesign::performGainBuffering(Net* net,
701725
}
702726
};
703727

704-
// Collect all sinks
728+
// 1. Collect all sinks
705729
std::vector<EnqueuedPin> sinks;
706730

707731
NetConnectedPinIterator* pin_iter = network_->connectedPinIterator(net);
@@ -744,8 +768,8 @@ bool RepairDesign::performGainBuffering(Net* net,
744768
}
745769
std::ranges::sort(sinks, PinRequiredHigher(network_));
746770

747-
// Iterate until we satisfy both the gain condition and max_fanout
748-
// on drvr_pin
771+
// 2. Iterate until we satisfy both the gain condition and max_fanout
772+
// on drvr_pin
749773
while (sinks.size() > max_fanout
750774
|| (has_driver_cin && load > cin * gate_gain)) {
751775
float load_acc = 0;
@@ -766,102 +790,47 @@ bool RepairDesign::performGainBuffering(Net* net,
766790

767791
// Find the smallest buffer satisfying the gain condition on
768792
// its output pin
769-
auto size = buffer_sizes_.begin();
770-
for (; size != buffer_sizes_.end() - 1; size++) {
771-
if (bufferCin(*size) > load_acc / resizer_->buffer_sizing_cap_ratio_) {
793+
auto buf_cell = buffer_sizes_.begin();
794+
for (; buf_cell != buffer_sizes_.end() - 1; buf_cell++) {
795+
if (bufferCin(*buf_cell)
796+
> load_acc / resizer_->buffer_sizing_cap_ratio_) {
772797
break;
773798
}
774799
}
775800

776-
if (bufferCin(*size) >= 0.9f * load_acc) {
801+
if (bufferCin(*buf_cell) >= 0.9f * load_acc) {
777802
// We are getting dimishing returns on inserting a buffer, stop
778803
// the algorithm here (we might have been called with a low gain value)
779804
break;
780805
}
781806

782-
// Get scope of driver, put any new buffers in that scope
783-
sta::Pin* driver_pin = nullptr;
784-
odb::dbModule* driver_parent = db_network_->getNetDriverParentModule(
785-
net, driver_pin, db_network_->hasHierarchy());
786-
odb::dbModInst* parent_mod_inst = driver_parent->getModInst();
787-
Instance* parent;
788-
if (parent_mod_inst) {
789-
parent = db_network_->dbToSta(parent_mod_inst);
790-
} else {
791-
parent = db_network_->topInstance();
792-
}
793-
794-
// note any hierarchical nets.
795-
// and move them to the output of the buffer.
796-
odb::dbModNet* driver_mod_net = db_network_->hierNet(driver_pin);
797-
if (driver_mod_net) {
798-
// only disconnect the modnet, we hook it to the output of the buffer.
799-
db_network_->disconnectPin(driver_pin,
800-
db_network_->dbToSta(driver_mod_net));
801-
}
802-
803-
Net* new_net = db_network_->makeNet(parent);
804-
dbNet* net_db = db_network_->staToDb(net);
805-
dbNet* new_net_db = db_network_->staToDb(new_net);
806-
new_net_db->setSigType(net_db->getSigType());
807-
// TODO: Propagate NDR settings
808-
if (net_db->getNonDefaultRule()) {
809-
new_net_db->setNonDefaultRule(net_db->getNonDefaultRule());
810-
}
811-
812-
const Point drvr_loc = db_network_->location(drvr_pin);
813-
814-
// create instance in driver parent
815-
Instance* inst = resizer_->makeBuffer(*size, "gain", parent, drvr_loc);
816-
817807
LibertyPort *size_in, *size_out;
818-
(*size)->bufferPorts(size_in, size_out);
819-
Pin* buffer_ip_pin = nullptr;
820-
Pin* buffer_op_pin = nullptr;
821-
resizer_->getBufferPins(inst, buffer_ip_pin, buffer_op_pin);
822-
db_network_->connectPin(buffer_ip_pin, net);
823-
824-
// connect the buffer output to the new flat net and any modnet
825-
// Keep the original input net driving the buffer.
826-
// Update the hierarchical net/flat net correspondence because
827-
// the hierarhical net is moved to the output of the buffer.
828-
829-
db_network_->connectPin(
830-
buffer_op_pin, new_net, db_network_->dbToSta(driver_mod_net));
831-
832-
repaired_net = true;
833-
inserted_buffer_count_++;
834-
if (graphics_) {
835-
dbInst* db_inst = db_network_->staToDb(inst);
836-
graphics_->makeBuffer(db_inst);
837-
}
838-
808+
(*buf_cell)->bufferPorts(size_in, size_out);
839809
int max_level = 0;
810+
Pin* new_input_pin = nullptr;
811+
812+
// 3. Insert a new buffer
813+
PinSet group_set(db_network_);
840814
for (auto it = sinks.begin(); it != group_end; it++) {
841-
Pin* sink_pin = it->pin;
842-
LibertyPort* sink_port = network_->libertyPort(it->pin);
843-
Instance* sink_inst = network_->instance(it->pin);
844-
load -= sink_port->capacitance();
815+
group_set.insert(it->pin);
845816
max_level = std::max(it->level, max_level);
846-
847-
odb::dbModNet* sink_mod_net = db_network_->hierNet(sink_pin);
848-
// rewire the sink pin, taking care of both the flat net
849-
// and the hierarchical net. Update the hierarchical net
850-
// flat net correspondence
851-
db_network_->disconnectPin(sink_pin);
852-
db_network_->connectPin(sink_pin,
853-
db_network_->dbToSta(new_net_db),
854-
db_network_->dbToSta(sink_mod_net));
855817
if (it->level == 0) {
856-
Pin* new_pin = network_->findPin(sink_inst, sink_port);
857-
tree_boundary.push_back(graph_->pinLoadVertex(new_pin));
818+
tree_boundary.push_back(graph_->pinLoadVertex(it->pin));
858819
}
859820
}
860821

861-
Pin* new_input_pin = buffer_ip_pin;
822+
Instance* inst = resizer_->insertBufferBeforeLoads(
823+
net, &group_set, *buf_cell, nullptr, "gain");
824+
if (inst) {
825+
repaired_net = true;
826+
inserted_buffer_count_++;
827+
Pin* buffer_op_pin = nullptr;
828+
resizer_->getBufferPins(inst, new_input_pin, buffer_op_pin);
829+
}
862830

863-
Delay buffer_delay
864-
= resizer_->bufferDelay(*size, load_acc, resizer_->tgt_slew_dcalc_ap_);
831+
// 4. New buffer input pin is enqueued as a new sink
832+
Delay buffer_delay = resizer_->bufferDelay(
833+
*buf_cell, load_acc, resizer_->tgt_slew_dcalc_ap_);
865834

866835
auto new_pin = EnqueuedPin{new_input_pin,
867836
(group_end - 1)->required_path,
@@ -873,8 +842,11 @@ bool RepairDesign::performGainBuffering(Net* net,
873842
std::ranges::upper_bound(sinks, new_pin, PinRequiredHigher(network_)),
874843
new_pin);
875844

845+
load -= load_acc;
876846
load += size_in->capacitance();
877847
}
848+
849+
// 5. Incremental timing update
878850
sta_->ensureLevelized();
879851
sta::Level max_level = 0;
880852
for (auto vertex : tree_boundary) {

src/rsz/src/Resizer.cc

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4991,10 +4991,7 @@ odb::dbInst* Resizer::insertBufferBeforeLoads(
49914991
return nullptr;
49924992
}
49934993

4994-
// Make a non-const copy for dbNet API
4995-
std::set<odb::dbObject*> loads_copy = loads;
4996-
4997-
odb::dbInst* buffer_inst = net->insertBufferBeforeLoads(loads_copy,
4994+
odb::dbInst* buffer_inst = net->insertBufferBeforeLoads(loads,
49984995
buffer_cell,
49994996
loc,
50004997
new_buf_base_name,

src/rsz/src/SplitLoadMove.cc

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,27 +54,35 @@ using sta::Slew;
5454
using sta::Vertex;
5555
using sta::VertexOutEdgeIterator;
5656

57+
/// SplitLoadMove: Optimize timing by splitting high-fanout nets
58+
///
59+
/// Purpose: Reduce capacitive load on critical path by dividing loads into
60+
/// two groups based on timing slack.
61+
///
62+
/// @param drvr_path Path including the driver pin
63+
/// @param drvr_index Index of the driver pin in the path
64+
/// @param drvr_slack Slack at the driver pin
65+
/// @param expanded Expanded path containing detailed timing information
66+
/// @param setup_slack_margin Target slack margin for setup timing optimization.
67+
/// Paths with slack less than this margin are
68+
/// considered violating even if it is positive.
69+
///
70+
/// Algorithm:
71+
/// 1. Sort all fanout loads by slack margin (timing slack relative to
72+
/// driver)
73+
/// 2. Upper 50% (loads with MORE timing slack) -> driven by new buffer
74+
/// 3. Lower 50% (loads on CRITICAL path) -> driven by original driver
75+
///
76+
/// Result: Critical loads see reduced capacitance, improving setup timing.
77+
///
78+
/// Precondition: Fanout count must exceed split_load_min_fanout_
79+
///
5780
bool SplitLoadMove::doMove(const Path* drvr_path,
5881
int drvr_index,
5982
Slack drvr_slack,
6083
PathExpanded* expanded,
6184
float setup_slack_margin)
6285
{
63-
// SplitLoadMove: Optimize timing by splitting high-fanout nets
64-
//
65-
// Purpose: Reduce capacitive load on critical path by dividing loads into
66-
// two groups based on timing slack.
67-
//
68-
// Algorithm:
69-
// 1. Sort all fanout loads by slack margin (timing slack relative to
70-
// driver)
71-
// 2. Upper 50% (loads with MORE timing slack) → driven by new buffer
72-
// 3. Lower 50% (loads on CRITICAL path) → driven by original driver
73-
//
74-
// Result: Critical loads see reduced capacitance, improving setup timing.
75-
//
76-
// Precondition: Fanout count must exceed split_load_min_fanout_
77-
//
7886
Pin* drvr_pin = drvr_path->pin(this);
7987
Vertex* drvr_vertex = drvr_path->vertex(sta_);
8088
const Path* load_path = expanded->path(drvr_index + 1);

src/rsz/test/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ TESTS = [
3232
"fanin_fanout1",
3333
"gain_buffering1",
3434
"gain_buffering1_hier",
35+
"gain_buffering2",
3536
"gcd_resize",
3637
"insert_buffer",
3738
"insert_buffer_hier",

src/rsz/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ or_integration_tests(
2727
eqy_repair_setup5
2828
fanin_fanout1
2929
gain_buffering1
30+
gain_buffering2
3031
gcd_resize
3132
insert_buffer
3233
insert_buffer_hier

0 commit comments

Comments
 (0)