Skip to content

Commit 585b08f

Browse files
authored
Merge pull request #7999 from jfgava/cts-ndr-strategies
cts: multiple strategies to apply NDR to clock nets
2 parents fe64f81 + 3d2b3fa commit 585b08f

File tree

13 files changed

+209
-28
lines changed

13 files changed

+209
-28
lines changed

src/cts/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ clock_tree_synthesis
6161
[-num_static_layers]
6262
[-sink_clustering_buffer]
6363
[-obstruction_aware]
64-
[-apply_ndr]
64+
[-apply_ndr strategy]
6565
[-insertion_delay]
6666
[-dont_use_dummy_load]
6767
[-sink_buffer_max_cap_derate derate_value]
@@ -92,7 +92,7 @@ clock_tree_synthesis
9292
| `-num_static_layers` | Set the number of static layers. The default value is `0`, and the allowed values are integers `[0, MAX_INT]`. |
9393
| `-sink_clustering_buffer` | Set the sink clustering buffer(s) to be used. |
9494
| `-obstruction_aware` | Enables obstruction-aware buffering such that clock buffers are not placed on top of blockages or hard macros. This option may reduce legalizer displacement, leading to better latency, skew or timing QoR. The default value is `False`, and the allowed values are bool. |
95-
| `-apply_ndr` | Applies 2X spacing non-default rule to all clock nets except leaf-level nets. The default value is `False`. |
95+
| `-apply_ndr` | Applies 2X spacing non-default rule to clock nets except leaf-level nets following some strategy. There are four strategy options: `none, root_only, half, full`. If this is not specified, the default value is `none`. |
9696
| `-dont_use_dummy_load` | Don't apply dummy buffer or inverter cells at clock tree leaves to balance loads. The default values is `False`. |
9797
| `-sink_buffer_max_cap_derate` | Use this option to control automatic buffer selection. To favor strong(weak) drive strength buffers use a small(large) value. The default value is `0.01`, meaning that buffers are selected by derating max cap limit by 0.01. The value of 1.0 means no derating of max cap limit. |
9898
| `-delay_buffer_derate` | This option balances latencies between macro cells and registers by inserting delay buffers. The default value is `1.0`, meaning all needed delay buffers are inserted. A value of 0.5 means only half of necessary delay buffers are inserted. A value of 0.0 means no insertion of delay buffers. |

src/cts/include/cts/TritonCTS.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,25 @@ class TritonCTS
9898
void buildClockTrees();
9999
void writeDataToDb();
100100

101+
// NDR functions
102+
std::vector<int> getAllClockTreeLevels(Clock& clockNet);
103+
int applyNDRToClockLevels(Clock& clockNet,
104+
odb::dbTechNonDefaultRule* clockNDR,
105+
const std::vector<int>& targetLevels);
106+
107+
int applyNDRToClockLevelRange(Clock& clockNet,
108+
odb::dbTechNonDefaultRule* clockNDR,
109+
int minLevel,
110+
int maxLevel);
111+
int applyNDRToFirstHalfLevels(Clock& clockNet,
112+
odb::dbTechNonDefaultRule* clockNDR);
113+
101114
// db functions
102115
bool masterExists(const std::string& master) const;
103116
void populateTritonCTS();
104117
void writeClockNetsToDb(TreeBuilder* builder,
105118
std::set<odb::dbNet*>& clkLeafNets);
106-
void writeClockNDRsToDb(const std::set<odb::dbNet*>& clkLeafNets);
119+
void writeClockNDRsToDb(TreeBuilder* builder);
107120
void incrementNumClocks() { ++numberOfClocks_; }
108121
void clearNumClocks() { numberOfClocks_ = 0; }
109122
unsigned getNumClocks() const { return numberOfClocks_; }

src/cts/src/Clock.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,16 @@ class ClockSubNet
9494
std::deque<ClockInst*> instances_;
9595
std::unordered_map<ClockInst*, unsigned> mapInstToIdx_;
9696
bool leafLevel_ = false;
97+
int level_ = -1;
98+
odb::dbNet* netObj_ = nullptr;
9799

98100
public:
99101
explicit ClockSubNet(const std::string& name) : name_(name) {}
100102

103+
void setNetObj(odb::dbNet* net) { netObj_ = net; }
104+
odb::dbNet* getNetObj() { return netObj_; }
105+
void setTreeLevel(int level) { level_ = level; }
106+
int getTreeLevel() { return level_; }
101107
void setLeafLevel(bool isLeaf) { leafLevel_ = isLeaf; }
102108
bool isLeafLevel() const { return leafLevel_; }
103109

src/cts/src/CtsOptions.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ namespace cts {
2727
class CtsOptions : public odb::dbBlockCallBackObj
2828
{
2929
public:
30+
enum class NdrStrategy
31+
{
32+
NONE,
33+
ROOT_ONLY,
34+
HALF,
35+
FULL
36+
};
37+
3038
enum class MasterType
3139
{
3240
DUMMY,
@@ -222,8 +230,6 @@ class CtsOptions : public odb::dbBlockCallBackObj
222230
stt::SteinerTreeBuilder* getSttBuilder() const { return sttBuilder_; }
223231
void setObstructionAware(bool obs) { obsAware_ = obs; }
224232
bool getObstructionAware() const { return obsAware_; }
225-
void setApplyNDR(bool ndr) { applyNDR_ = ndr; }
226-
bool applyNDR() const { return applyNDR_; }
227233
void enableInsertionDelay(bool insDelay) { insertionDelay_ = insDelay; }
228234
bool insertionDelayEnabled() const { return insertionDelay_; }
229235
void setBufferListInferred(bool inferred) { bufferListInferred_ = inferred; }
@@ -264,6 +270,10 @@ class CtsOptions : public odb::dbBlockCallBackObj
264270
void setRepairClockNets(bool value) { repairClockNets_ = value; }
265271
bool getRepairClockNets() { return repairClockNets_; }
266272

273+
// NDR strategies
274+
void setApplyNDR(NdrStrategy strategy) { ndrStrategy_ = strategy; }
275+
NdrStrategy getApplyNdr() const { return ndrStrategy_; }
276+
267277
private:
268278
std::string clockNets_;
269279
std::string rootBuffer_;
@@ -315,7 +325,6 @@ class CtsOptions : public odb::dbBlockCallBackObj
315325
utl::Logger* logger_ = nullptr;
316326
stt::SteinerTreeBuilder* sttBuilder_ = nullptr;
317327
bool obsAware_ = true;
318-
bool applyNDR_ = false;
319328
bool insertionDelay_ = true;
320329
bool bufferListInferred_ = false;
321330
bool sinkBufferInferred_ = false;
@@ -330,6 +339,7 @@ class CtsOptions : public odb::dbBlockCallBackObj
330339
std::string dummyload_prefix_ = "clkload";
331340
MasterCount dummy_count_;
332341
bool repairClockNets_ = false;
342+
NdrStrategy ndrStrategy_ = NdrStrategy::NONE;
333343
};
334344

335345
} // namespace cts

src/cts/src/HTreeBuilder.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,6 +1849,7 @@ void HTreeBuilder::createClockSubNets()
18491849

18501850
addTreeLevelBuffer(&rootBuffer);
18511851
ClockSubNet& rootClockSubNet = clock_.addSubNet("clknet_0");
1852+
rootClockSubNet.setTreeLevel(0);
18521853
rootClockSubNet.addInst(rootBuffer);
18531854
treeBufLevels_++;
18541855

@@ -1950,6 +1951,11 @@ void HTreeBuilder::createClockSubNets()
19501951
wireSegmentUnit_,
19511952
this);
19521953

1954+
// Set clock tree level the first time only.
1955+
if (builder.getDrivingSubNet()->getTreeLevel() < 0) {
1956+
builder.getDrivingSubNet()->setTreeLevel(levelIdx);
1957+
}
1958+
19531959
if (!options_->getTreeBuffer().empty()) {
19541960
builder.build(options_->getTreeBuffer());
19551961
} else {
@@ -2017,6 +2023,7 @@ void HTreeBuilder::createSingleBufferClockNet()
20172023

20182024
addTreeLevelBuffer(&rootBuffer);
20192025
ClockSubNet& clockSubNet = clock_.addSubNet("clknet_0");
2026+
clockSubNet.setTreeLevel(0);
20202027
clockSubNet.addInst(rootBuffer);
20212028

20222029
clock_.forEachSink([&](ClockInst& inst) { clockSubNet.addInst(inst); });

src/cts/src/TritonCTS.cpp

Lines changed: 119 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -481,8 +481,8 @@ void TritonCTS::writeDataToDb()
481481

482482
for (auto& builder : builders_) {
483483
writeClockNetsToDb(builder.get(), clkLeafNets);
484-
if (options_->applyNDR()) {
485-
writeClockNDRsToDb(clkLeafNets);
484+
if (options_->getApplyNdr() != CtsOptions::NdrStrategy::NONE) {
485+
writeClockNDRsToDb(builder.get());
486486
}
487487
if (options_->dummyLoadEnabled()) {
488488
writeDummyLoadsToDb(builder->getClock(), clkDummies);
@@ -1438,6 +1438,7 @@ void TritonCTS::writeClockNetsToDb(TreeBuilder* builder,
14381438
}
14391439
odb::dbNet* clkSubNet
14401440
= odb::dbNet::create(block_, subNet.getName().c_str());
1441+
subNet.setNetObj(clkSubNet);
14411442

14421443
++numClkNets_;
14431444
clkSubNet->setSigType(odb::dbSigType::CLOCK);
@@ -1557,11 +1558,110 @@ void TritonCTS::writeClockNetsToDb(TreeBuilder* builder,
15571558
CTS, 17, " Max level of the clock tree: {}.", clockNet.getMaxLevel());
15581559
}
15591560

1560-
void TritonCTS::writeClockNDRsToDb(const std::set<odb::dbNet*>& clkLeafNets)
1561+
// Utility function to get all unique clock tree levels
1562+
std::vector<int> TritonCTS::getAllClockTreeLevels(Clock& clockNet)
1563+
{
1564+
std::set<int> uniqueLevels;
1565+
1566+
clockNet.forEachSubNet([&](ClockSubNet& subNet) {
1567+
if (!subNet.isLeafLevel()) {
1568+
uniqueLevels.insert(subNet.getTreeLevel());
1569+
}
1570+
});
1571+
1572+
return std::vector<int>(uniqueLevels.begin(), uniqueLevels.end());
1573+
}
1574+
1575+
// Function to apply NDR to specific clock tree levels and return the number of
1576+
// NDR applied nets
1577+
int TritonCTS::applyNDRToClockLevels(Clock& clockNet,
1578+
odb::dbTechNonDefaultRule* clockNDR,
1579+
const std::vector<int>& targetLevels)
1580+
{
1581+
int ndrAppliedNets = 0;
1582+
1583+
debugPrint(
1584+
logger_, CTS, "clustering", 1, "Applying NDR to clock tree levels: ");
1585+
for (int level : targetLevels) {
1586+
debugPrint(logger_, CTS, "clustering", 1, "{} ", level);
1587+
}
1588+
1589+
// Check if the main clock net (level 0) is in the level list
1590+
if (std::find(targetLevels.begin(), targetLevels.end(), 0)
1591+
!= targetLevels.end()) {
1592+
odb::dbNet* clk_net = clockNet.getNetObj();
1593+
clk_net->setNonDefaultRule(clockNDR);
1594+
ndrAppliedNets++;
1595+
// clang-format off
1596+
debugPrint(logger_, CTS, "clustering", 1,
1597+
"Applied NDR to: {} (level {})", clockNet.getName(), 0);
1598+
// clang-format on
1599+
}
1600+
1601+
// Check clock sub nets list and apply NDR if level matches
1602+
clockNet.forEachSubNet([&](ClockSubNet& subNet) {
1603+
int level = subNet.getTreeLevel();
1604+
if (std::find(targetLevels.begin(), targetLevels.end(), level)
1605+
!= targetLevels.end()) {
1606+
odb::dbNet* net = subNet.getNetObj();
1607+
if (!subNet.isLeafLevel()) {
1608+
net->setNonDefaultRule(clockNDR);
1609+
ndrAppliedNets++;
1610+
std::string net_name = net->getName();
1611+
// clang-format off
1612+
debugPrint(logger_, CTS, "clustering", 1,
1613+
"Applied NDR to: {} (level {})", net_name, level);
1614+
// clang-format on
1615+
}
1616+
}
1617+
});
1618+
1619+
return ndrAppliedNets;
1620+
}
1621+
1622+
// Alternative function to apply NDR to a range of clock tree levels
1623+
int TritonCTS::applyNDRToClockLevelRange(Clock& clockNet,
1624+
odb::dbTechNonDefaultRule* clockNDR,
1625+
const int minLevel,
1626+
const int maxLevel)
1627+
{
1628+
std::vector<int> targetLevels;
1629+
for (int i = minLevel; i <= maxLevel; i++) {
1630+
targetLevels.push_back(i);
1631+
}
1632+
1633+
return applyNDRToClockLevels(clockNet, clockNDR, targetLevels);
1634+
}
1635+
1636+
// Function to apply NDR to the first half of clock tree levels
1637+
int TritonCTS::applyNDRToFirstHalfLevels(Clock& clockNet,
1638+
odb::dbTechNonDefaultRule* clockNDR)
1639+
{
1640+
// Get all unique levels in the design
1641+
const std::vector<int> allLevels = getAllClockTreeLevels(clockNet);
1642+
1643+
// Calculate first half (rounding up if odd number of levels)
1644+
const int halfCount = (allLevels.size() + 1) / 2;
1645+
1646+
// Create vector with first half of levels
1647+
std::vector<int> firstHalfLevels(allLevels.begin(),
1648+
allLevels.begin() + halfCount);
1649+
1650+
// clang-format off
1651+
debugPrint(logger_, CTS, "clustering", 1, "Total clock tree levels found: {}"
1652+
" Applying NDR to first {} levels", allLevels.size(), halfCount);
1653+
// clang-format on
1654+
1655+
// Apply NDR to the first half
1656+
return applyNDRToClockLevels(clockNet, clockNDR, firstHalfLevels);
1657+
}
1658+
1659+
void TritonCTS::writeClockNDRsToDb(TreeBuilder* builder)
15611660
{
15621661
char ruleName[64];
15631662
int ruleIndex = 0;
15641663
odb::dbTechNonDefaultRule* clockNDR;
1664+
Clock& clockNet = builder->getClock();
15651665

15661666
// create a new non-default rule in *block* not tech
15671667
while (ruleIndex >= 0) {
@@ -1595,14 +1695,23 @@ void TritonCTS::writeClockNDRsToDb(const std::set<odb::dbNet*>& clkLeafNets)
15951695
// clang-format on
15961696
}
15971697

1598-
// apply NDR to all non-leaf clock nets
15991698
int clkNets = 0;
1600-
for (odb::dbNet* net : block_->getNets()) {
1601-
if (net->getSigType() == odb::dbSigType::CLOCK
1602-
&& (clkLeafNets.find(net) == clkLeafNets.end())) {
1603-
net->setNonDefaultRule(clockNDR);
1604-
clkNets++;
1605-
}
1699+
1700+
// Apply NDR following the selected strategy (root_only, half, full)
1701+
switch (options_->getApplyNdr()) {
1702+
case CtsOptions::NdrStrategy::ROOT_ONLY:
1703+
clkNets = applyNDRToClockLevels(clockNet, clockNDR, {0});
1704+
break;
1705+
case CtsOptions::NdrStrategy::HALF:
1706+
clkNets = applyNDRToFirstHalfLevels(clockNet, clockNDR);
1707+
break;
1708+
case CtsOptions::NdrStrategy::FULL:
1709+
clkNets = applyNDRToClockLevels(
1710+
clockNet, clockNDR, getAllClockTreeLevels(clockNet));
1711+
break;
1712+
case CtsOptions::NdrStrategy::NONE:
1713+
// Should not be called
1714+
break;
16061715
}
16071716

16081717
logger_->info(CTS,

src/cts/src/TritonCTS.i

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,35 @@ using ord::getTritonCts;
2626
%ignore cts::CtsOptions::setObserver;
2727
%ignore cts::CtsOptions::getObserver;
2828

29+
// Enum: CtsOptions::NdrStrategy
30+
%typemap(typecheck) CtsOptions::NdrStrategy {
31+
char *str = Tcl_GetStringFromObj($input, 0);
32+
if (strcasecmp(str, "NONE") == 0) {
33+
$1 = 1;
34+
} else if (strcasecmp(str, "ROOT_ONLY") == 0) {
35+
$1 = 1;
36+
} else if (strcasecmp(str, "HALF") == 0) {
37+
$1 = 1;
38+
} else if (strcasecmp(str, "FULL") == 0) {
39+
$1 = 1;
40+
} else {
41+
$1 = 0;
42+
}
43+
}
44+
45+
%typemap(in) CtsOptions::NdrStrategy {
46+
char *str = Tcl_GetStringFromObj($input, 0);
47+
if (strcasecmp(str, "ROOT_ONLY") == 0) {
48+
$1 = CtsOptions::NdrStrategy::ROOT_ONLY;
49+
} else if (strcasecmp(str, "HALF") == 0) {
50+
$1 = CtsOptions::NdrStrategy::HALF;
51+
} else if (strcasecmp(str, "FULL") == 0) {
52+
$1 = CtsOptions::NdrStrategy::FULL;
53+
} else {
54+
$1 = CtsOptions::NdrStrategy::NONE;
55+
};
56+
}
57+
2958
%inline %{
3059

3160
void
@@ -205,9 +234,9 @@ set_obstruction_aware(bool obs)
205234
}
206235

207236
void
208-
set_apply_ndr(bool ndr)
237+
set_apply_ndr(CtsOptions::NdrStrategy strategy)
209238
{
210-
getTritonCts()->getParms()->setApplyNDR(ndr);
239+
getTritonCts()->getParms()->setApplyNDR(strategy);
211240
}
212241

213242
void

src/cts/src/TritonCTS.tcl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ sta::define_cmd_args "clock_tree_synthesis" {[-wire_unit unit]
5656
[-sink_clustering_buffer] \
5757
[-obstruction_aware] \
5858
[-no_obstruction_aware] \
59-
[-apply_ndr] \
59+
[-apply_ndr strategy] \
6060
[-sink_buffer_max_cap_derate] \
6161
[-dont_use_dummy_load] \
6262
[-delay_buffer_derate] \
@@ -74,9 +74,10 @@ proc clock_tree_synthesis { args } {
7474
-clustering_unbalance_ratio -sink_clustering_max_diameter \
7575
-macro_clustering_size -macro_clustering_max_diameter \
7676
-sink_clustering_levels -tree_buf \
77+
-apply_ndr \
7778
-sink_buffer_max_cap_derate -delay_buffer_derate -library} \
7879
flags {-post_cts_disable -sink_clustering_enable -balance_levels \
79-
-obstruction_aware -no_obstruction_aware -apply_ndr \
80+
-obstruction_aware -no_obstruction_aware \
8081
-dont_use_dummy_load -repair_clock_nets -no_insertion_delay
8182
} ;# checker off
8283

@@ -213,7 +214,10 @@ proc clock_tree_synthesis { args } {
213214
cts::set_dummy_load true
214215
}
215216

216-
cts::set_apply_ndr [info exists flags(-apply_ndr)]
217+
if { [info exists keys(-apply_ndr)] } {
218+
set strategy $keys(-apply_ndr)
219+
cts::set_apply_ndr $strategy
220+
}
217221

218222
if { [info exists flags(-repair_clock_nets)] } {
219223
cts::set_repair_clock_nets true

0 commit comments

Comments
 (0)