Skip to content

Commit dddf42d

Browse files
authored
CI crash fixes (#691)
## Summary by CodeRabbit ## Release Notes * **Performance** * Optimized branch-and-bound algorithm with improved search termination conditions * **Improvements** * Enhanced concurrency control mechanisms across solver components * Improved logger initialization and lifecycle management for better resource handling <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> Authors: - Alice Boucher (https://github.com/aliceb-nv) Approvers: - Nicolas Blin (https://github.com/Kh4ster) URL: #691
1 parent ba48131 commit dddf42d

File tree

9 files changed

+47
-20
lines changed

9 files changed

+47
-20
lines changed

cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ class pdlp_solver_settings_t {
212212
method_t method{method_t::Concurrent};
213213
bool inside_mip{false};
214214
// For concurrent termination
215-
volatile int* concurrent_halt{nullptr};
215+
std::atomic<int>* concurrent_halt{nullptr};
216216
static constexpr f_t minimal_absolute_tolerance = 1.0e-12;
217217

218218
private:

cpp/src/dual_simplex/branch_and_bound.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,6 +1135,7 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
11351135
if (get_upper_bound() < start_node->node.lower_bound) { continue; }
11361136

11371137
bool recompute_bounds_and_basis = true;
1138+
i_t nodes_explored = 0;
11381139
search_tree_t<i_t, f_t> subtree(std::move(start_node->node));
11391140
std::deque<mip_node_t<i_t, f_t>*> stack;
11401141
stack.push_front(&subtree.root);
@@ -1152,6 +1153,8 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
11521153

11531154
if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; }
11541155

1156+
if (nodes_explored >= 1000) { break; }
1157+
11551158
node_solve_info_t status = solve_node(node_ptr,
11561159
subtree,
11571160
leaf_problem,
@@ -1165,6 +1168,8 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(const csr_matrix_t<i_t, f_t>& A
11651168
start_node->upper,
11661169
log);
11671170

1171+
nodes_explored++;
1172+
11681173
recompute_bounds_and_basis = !has_children(status);
11691174

11701175
if (status == node_solve_info_t::TIME_LIMIT) {

cpp/src/dual_simplex/branch_and_bound.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class branch_and_bound_t {
113113
f_t get_lower_bound();
114114
i_t get_heap_size();
115115
bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; }
116-
volatile int* get_root_concurrent_halt() { return &root_concurrent_halt_; }
116+
std::atomic<int>* get_root_concurrent_halt() { return &root_concurrent_halt_; }
117117
void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; }
118118
lp_status_t solve_root_relaxation(simplex_solver_settings_t<i_t, f_t> const& lp_settings);
119119

@@ -170,7 +170,7 @@ class branch_and_bound_t {
170170
std::vector<f_t> edge_norms_;
171171
std::atomic<bool> root_crossover_solution_set_{false};
172172
bool enable_concurrent_lp_root_solve_{false};
173-
volatile int root_concurrent_halt_{0};
173+
std::atomic<int> root_concurrent_halt_{0};
174174

175175
// Pseudocosts
176176
pseudo_costs_t<i_t, f_t> pc_;

cpp/src/dual_simplex/simplex_solver_settings.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ struct simplex_solver_settings_t {
145145
std::function<void()> heuristic_preemption_callback;
146146
std::function<void(std::vector<f_t>&, std::vector<f_t>&, f_t)> set_simplex_solution_callback;
147147
mutable logger_t log;
148-
volatile int* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should
149-
// continue, 1 if solver should halt
148+
std::atomic<int>* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should
149+
// continue, 1 if solver should halt
150150
};
151151

152152
} // namespace cuopt::linear_programming::dual_simplex

cpp/src/linear_programming/solve.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ void setup_device_symbols(rmm::cuda_stream_view stream_view)
306306
detail::set_pdlp_hyper_parameters(stream_view);
307307
}
308308

309-
volatile int global_concurrent_halt;
309+
std::atomic<int> global_concurrent_halt{0};
310310

311311
template <typename i_t, typename f_t>
312312
optimization_problem_solution_t<i_t, f_t> convert_dual_simplex_sol(

cpp/src/mip/diversity/diversity_manager.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class diversity_manager_t {
9595
// mutex for the simplex solution update
9696
std::mutex relaxed_solution_mutex;
9797
// atomic for signalling pdlp to stop
98-
volatile int global_concurrent_halt{0};
98+
std::atomic<int> global_concurrent_halt{0};
9999

100100
rins_t<i_t, f_t> rins;
101101

cpp/src/mip/relaxed_lp/relaxed_lp.cuh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
namespace cuopt::linear_programming::detail {
1818

1919
struct relaxed_lp_settings_t {
20-
double tolerance = 1e-4;
21-
double time_limit = 1.0;
22-
bool check_infeasibility = true;
23-
bool return_first_feasible = false;
24-
bool save_state = true;
25-
bool per_constraint_residual = true;
26-
bool has_initial_primal = true;
27-
volatile int* concurrent_halt = nullptr;
20+
double tolerance = 1e-4;
21+
double time_limit = 1.0;
22+
bool check_infeasibility = true;
23+
bool return_first_feasible = false;
24+
bool save_state = true;
25+
bool per_constraint_residual = true;
26+
bool has_initial_primal = true;
27+
std::atomic<int>* concurrent_halt = nullptr;
2828
};
2929

3030
template <typename i_t, typename f_t>

cpp/src/utilities/logger.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,26 @@ void reset_default_logger()
137137
default_logger().flush_on(rapids_logger::level_enum::debug);
138138
}
139139

140+
// Guard object whose destructor resets the logger
141+
struct logger_config_guard {
142+
~logger_config_guard() { cuopt::reset_default_logger(); }
143+
};
144+
145+
// Weak reference to detect if any init_logger_t instance is still alive
146+
static std::weak_ptr<logger_config_guard> g_active_guard;
147+
static std::mutex g_guard_mutex;
148+
140149
init_logger_t::init_logger_t(std::string log_file, bool log_to_console)
141150
{
142-
// until this function is called, the default sink is the buffer sink
151+
std::lock_guard<std::mutex> lock(g_guard_mutex);
152+
153+
auto existing_guard = g_active_guard.lock();
154+
if (existing_guard) {
155+
// Reuse existing configuration, just hold a reference to keep it alive
156+
guard_ = existing_guard;
157+
return;
158+
}
159+
143160
cuopt::default_logger().sinks().clear();
144161

145162
// re-initialize sinks
@@ -164,8 +181,11 @@ init_logger_t::init_logger_t(std::string log_file, bool log_to_console)
164181
for (const auto& entry : buffered_messages) {
165182
cuopt::default_logger().log(entry.level, entry.msg.c_str());
166183
}
167-
}
168184

169-
init_logger_t::~init_logger_t() { cuopt::reset_default_logger(); }
185+
// Create guard and store weak reference for future instances to find
186+
auto guard = std::make_shared<logger_config_guard>();
187+
g_active_guard = guard;
188+
guard_ = guard;
189+
}
170190

171191
} // namespace cuopt

cpp/src/utilities/logger.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@ rapids_logger::logger& default_logger();
3333
*/
3434
void reset_default_logger();
3535

36+
// Ref-counted logger initializer
3637
class init_logger_t {
38+
// Using shared_ptr for ref-counting
39+
std::shared_ptr<void> guard_;
40+
3741
public:
3842
init_logger_t(std::string log_file, bool log_to_console);
39-
40-
~init_logger_t();
4143
};
4244

4345
} // namespace cuopt

0 commit comments

Comments
 (0)