Skip to content
10 changes: 1 addition & 9 deletions src/mpl/src/clusterEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,8 +1227,7 @@ void ClusteringEngine::multilevelAutocluster(Cluster* parent)
if (level_ == 0) {
const int leaf_max_std_cell
= tree_->base_max_std_cell
/ std::pow(tree_->cluster_size_ratio, tree_->max_level - 1)
* (1 + size_tolerance_);
/ std::pow(tree_->cluster_size_ratio, tree_->max_level - 1);
if (parent->getNumStdCell() < leaf_max_std_cell) {
force_split_root = true;
debugPrint(logger_,
Expand Down Expand Up @@ -1293,13 +1292,6 @@ void ClusteringEngine::updateSizeThresholds()
max_std_cell_ = tree_->base_max_std_cell / coarse_factor;
min_std_cell_ = tree_->base_min_std_cell / coarse_factor;

// We define the tolerance to improve the robustness of our hierarchical
// clustering
max_macro_ *= (1 + size_tolerance_);
min_macro_ *= (1 - size_tolerance_);
max_std_cell_ *= (1 + size_tolerance_);
min_std_cell_ *= (1 - size_tolerance_);

if (min_macro_ <= 0) {
min_macro_ = 1;
max_macro_ = min_macro_ * tree_->cluster_size_ratio / 2.0;
Expand Down
1 change: 0 additions & 1 deletion src/mpl/src/clusterEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,6 @@ class ClusteringEngine
int min_macro_{0};
int max_std_cell_{0};
int min_std_cell_{0};
const float size_tolerance_ = 0.1;

// Variables for data flow
DataFlow data_connections_;
Expand Down
245 changes: 35 additions & 210 deletions src/mpl/src/hier_rtlmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,240 +635,65 @@ void HierRTLMP::calculateMacroTilings(Cluster* cluster)
}

std::vector<HardMacro*> hard_macros = cluster->getHardMacros();
TilingSet tilings_set;

if (hard_macros.size() == 1) {
int width = hard_macros[0]->getWidth();
int height = hard_macros[0]->getHeight();

TilingList tilings;
tilings.emplace_back(width, height);
cluster->setTilings(tilings);

debugPrint(logger_,
MPL,
"coarse_shaping",
1,
"{} has only one macro, set tiling according to macro with halo",
cluster->getName());
return;
}
int number_of_macros = hard_macros.size();
int macro_width = hard_macros.front()->getWidth();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

There's a potential for a crash here if cluster->getHardMacros() returns an empty vector, as hard_macros.front() would be undefined behavior. Although the call chain seems to prevent this, adding a defensive check for an empty vector would make the code more robust. For example:

std::vector<HardMacro*> hard_macros = cluster->getHardMacros();
if (hard_macros.empty()) {
  return;
}
int number_of_macros = hard_macros.size();
// ...

int macro_height = hard_macros.front()->getHeight();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does this check that all macros are of a single master? What if they are not?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The clusters are always made of macros of the same size. The function that creates/merges macro clusters checks for their sizes (ClusteringEngine::groupSingleMacroCluster using the size_class variable).


if (cluster->isArrayOfInterconnectedMacros()) {
setTightPackingTilings(cluster);
return;
}
TilingList tilings = generateTilingsForMacroCluster(
macro_width, macro_height, number_of_macros);

if (graphics_) {
graphics_->setCurrentCluster(cluster);
}

// otherwise call simulated annealing to determine tilings
// set the action probabilities
const float action_sum = pos_swap_prob_ + neg_swap_prob_ + double_swap_prob_
+ exchange_swap_prob_;

const odb::Rect outline(
0, 0, tree_->root->getWidth(), tree_->root->getHeight());

// update macros
std::vector<HardMacro> macros;
macros.reserve(hard_macros.size());
for (auto& macro : hard_macros) {
macros.push_back(*macro);
}
int num_perturb_per_step = (macros.size() > num_perturb_per_step_ / 10)
? macros.size()
: num_perturb_per_step_ / 10;
if (cluster->getParent() == nullptr) {
num_perturb_per_step = (macros.size() > num_perturb_per_step_ / 5)
? macros.size()
: num_perturb_per_step_ / 5;
}

// To generate different macro tilings, we vary the outline constraints
// we first vary the outline width while keeping outline_height fixed
// Then we vary the outline height while keeping outline_width fixed
// We vary the outline of cluster to generate different tilings
std::vector<float> vary_factor_list{1.0};
float vary_step = 1.0 / num_runs_; // change the outline by at most halfly
for (int i = 1; i < num_runs_; i++) {
vary_factor_list.push_back(1.0 - i * vary_step);
}
int remaining_runs = num_runs_;
int run_id = 0;
while (remaining_runs > 0) {
HardSAVector sa_batch;
const int run_thread
= graphics_ ? 1 : std::min(remaining_runs, num_threads_);
for (int i = 0; i < run_thread; i++) {
const odb::Rect new_outline(
0, 0, outline.dx() * vary_factor_list[run_id++], outline.dy());
if (graphics_) {
graphics_->setOutline(new_outline);
}
std::unique_ptr<SACoreHardMacro> sa
= std::make_unique<SACoreHardMacro>(tree_.get(),
new_outline,
macros,
shaping_core_weights_,
pos_swap_prob_ / action_sum,
neg_swap_prob_ / action_sum,
double_swap_prob_ / action_sum,
exchange_swap_prob_ / action_sum,
init_prob_,
max_num_step_,
num_perturb_per_step,
random_seed_ + run_id,
graphics_.get(),
logger_,
block_);
sa_batch.push_back(std::move(sa));
}
if (sa_batch.size() == 1) {
runSA<SACoreHardMacro>(sa_batch[0].get());
} else {
// multi threads
std::vector<std::thread> threads;
threads.reserve(sa_batch.size());
for (auto& sa : sa_batch) {
threads.emplace_back(runSA<SACoreHardMacro>, sa.get());
}
for (auto& th : threads) {
th.join();
}
}
// add macro tilings
for (auto& sa : sa_batch) {
if (sa->fitsIn(outline)) {
tilings_set.insert({sa->getWidth(), sa->getHeight()});
}
}
remaining_runs -= run_thread;
}
// change the outline height while keeping outline width fixed
remaining_runs = num_runs_;
run_id = 0;
while (remaining_runs > 0) {
HardSAVector sa_batch;
const int run_thread
= graphics_ ? 1 : std::min(remaining_runs, num_threads_);
for (int i = 0; i < run_thread; i++) {
const odb::Rect new_outline(
0, 0, outline.dx(), outline.dy() * vary_factor_list[run_id++]);
if (graphics_) {
graphics_->setOutline(new_outline);
}
std::unique_ptr<SACoreHardMacro> sa
= std::make_unique<SACoreHardMacro>(tree_.get(),
new_outline,
macros,
shaping_core_weights_,
pos_swap_prob_ / action_sum,
neg_swap_prob_ / action_sum,
double_swap_prob_ / action_sum,
exchange_swap_prob_ / action_sum,
init_prob_,
max_num_step_,
num_perturb_per_step,
random_seed_ + run_id,
graphics_.get(),
logger_,
block_);
sa_batch.push_back(std::move(sa));
}
if (sa_batch.size() == 1) {
runSA<SACoreHardMacro>(sa_batch[0].get());
} else {
// multi threads
std::vector<std::thread> threads;
threads.reserve(sa_batch.size());
for (auto& sa : sa_batch) {
threads.emplace_back(runSA<SACoreHardMacro>, sa.get());
}
for (auto& th : threads) {
th.join();
}
}
// add macro tilings
for (auto& sa : sa_batch) {
if (sa->fitsIn(outline)) {
tilings_set.insert({sa->getWidth(), sa->getHeight()});
}
}
remaining_runs -= run_thread;
if (tilings.empty()) {
tilings = generateTilingsForMacroCluster(
macro_width, macro_height, number_of_macros + 1);
}

if (tilings_set.empty()) {
if (tilings.empty()) {
logger_->error(MPL,
4,
"No valid tilings for hard macro cluster: {}",
cluster->getName());
"Unable to fit cluster {} within outline. Macro height: {}, "
"width: {}, number of macros: {}.",
cluster->getName(),
macro_width,
macro_height,
number_of_macros);
}

TilingList tilings_list(tilings_set.begin(), tilings_set.end());
std::ranges::sort(tilings_list, isAreaSmaller);
cluster->setTilings(tilings);

for (auto& tiling : tilings_list) {
debugPrint(logger_,
MPL,
"coarse_shaping",
2,
"width: {}, height: {}",
tiling.width(),
tiling.height());
}

// we only keep the minimum area tiling since all the macros has the same size
// later this can be relaxed. But this may cause problems because the
// minimizing the wirelength may leave holes near the boundary
TilingList new_tilings_list;
float first_tiling_area = tilings_list.front().area();
for (auto& tiling : tilings_list) {
if (tiling.area() <= first_tiling_area) {
new_tilings_list.push_back(tiling);
if (logger_->debugCheck(MPL, "coarse_shaping", 2)) {
std::string line = "Tiling for hard cluster " + cluster->getName()
+ " with " + std::to_string(number_of_macros)
+ " macros.\n";
for (auto& tiling : tilings) {
line += " < " + std::to_string(tiling.width()) + " , ";
line += std::to_string(tiling.height()) + " > ";
}
line += "\n";
debugPrint(logger_, MPL, "coarse_shaping", 2, "{}", line);
}
tilings_list = std::move(new_tilings_list);
cluster->setTilings(tilings_list);

std::string line = "Tiling for hard cluster " + cluster->getName() + " ";
for (auto& tiling : tilings_list) {
line += " < " + std::to_string(tiling.width()) + " , ";
line += std::to_string(tiling.height()) + " > ";
}
line += "\n";
debugPrint(logger_, MPL, "coarse_shaping", 2, "{}", line);
}

// Used only for arrays of interconnected macros.
void HierRTLMP::setTightPackingTilings(Cluster* macro_array)
TilingList HierRTLMP::generateTilingsForMacroCluster(int macro_width,
int macro_height,
int number_of_macros)
{
TilingList tight_packing_tilings;

int num_macro = macro_array->getNumMacro();
float macro_width = macro_array->getHardMacros().front()->getWidth();
float macro_height = macro_array->getHardMacros().front()->getHeight();

const odb::Rect outline = tree_->root->getBBox();

int columns = 0;
for (int rows = 1; rows < std::sqrt(num_macro) + 1; rows++) {
if (num_macro % rows == 0) {
columns = num_macro / rows;
int rows = 0;
for (int cols = 1; cols <= number_of_macros; cols++) {
if (number_of_macros % cols == 0) {
rows = number_of_macros / cols;
Comment on lines +685 to +686
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we only allow perfect tiling we will have more limited choices (e.g. for an 9 macros you won't try 2x5, only 1x9 and 3x3). Is this intentional?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. The previous code kept only the smallest area tilings too. Doing it this way also avoids notches.


// We don't consider tilings for right angle rotation orientations,
// because they're not allowed in our macro placer.
// Tiling needs to fit inside outline
if (columns * macro_width <= outline.dx()
if (cols * macro_width <= outline.dx()
&& rows * macro_height <= outline.dy()) {
tight_packing_tilings.emplace_back(columns * macro_width,
tight_packing_tilings.emplace_back(cols * macro_width,
rows * macro_height);
}
}
}

macro_array->setTilings(tight_packing_tilings);
return tight_packing_tilings;
}

void HierRTLMP::searchAvailableRegionsForUnconstrainedPins()
Expand Down
4 changes: 3 additions & 1 deletion src/mpl/src/hier_rtlmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ class HierRTLMP
void calculateChildrenTilings(Cluster* parent);
void calculateMacroTilings(Cluster* cluster);
IntervalList computeWidthIntervals(const TilingList& tilings);
void setTightPackingTilings(Cluster* macro_array);
TilingList generateTilingsForMacroCluster(int macro_width,
int macro_height,
int number_of_macros);
void searchAvailableRegionsForUnconstrainedPins();
BoundaryToRegionsMap getBoundaryToBlockedRegionsMap(
const std::vector<odb::Rect>& blocked_regions_for_pins) const;
Expand Down