Skip to content

Commit 3307a41

Browse files
authored
Heuristic Improvements: balance between generation and improvement heuristics (#382)
This PR changes the heuristic structure by creating a natural balance between generation and improvement. The FP/FJ loop now adds solution to the population and only if we have enough diverse solutions we exit the loop and execute the population improvement. The diversity is increased to `sqrt(n_integers)`. The recombiners are run between the current best and all other solutions in the current population, if stagnation is detected in FP/FJ loop and then the loop continues. The bounds prop rounding in the context of FP is also improved. When the dual simplex solution is set, the pdlp is warm started now with both primal and dual solutions. The default tolerance is now 1e-6 absolute tolerance and 1e-12 relative tolerance. This PR includes bug fixes on: - Apperance of inf/nan on `z` vector dual simplex phase2. - Invalid launch dimensions on FJ and hash kernels. - Timer diff and function time limit issues when the solver is run with unlimited time limit. Benchmark results in 10 mins run on H100: - Main branch: 207 feasible solutions and average gap: '28.54', 3 unfinished/crashed - This PR: 213 feasible and average gap: '23.11', 1 unfinished/crushed. (The PR didn't have any crash before merge with main branch) closes #142 closes #374 closes #218 Authors: - Akif ÇÖRDÜK (https://github.com/akifcorduk) Approvers: - Ramakrishnap (https://github.com/rgsl888prabhu) - Alice Boucher (https://github.com/aliceb-nv) URL: #382
1 parent 6e2e622 commit 3307a41

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+522
-549
lines changed

benchmarks/linear_programming/cuopt/mip_test_instances.hpp

Lines changed: 8 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -17,175 +17,11 @@
1717
#pragma once
1818
#include <string>
1919
#include <vector>
20-
std::vector<std::string> instances = {"30n20b8.mps",
21-
"50v-10.mps",
22-
"CMS750_4.mps",
23-
"academictimetablesmall.mps",
24-
"air05.mps",
25-
"app1-1.mps",
26-
"app1-2.mps",
27-
"assign1-5-8.mps",
28-
"atlanta-ip.mps",
29-
"bab2.mps",
30-
"bab6.mps",
31-
"beasleyC3.mps",
32-
"binkar10_1.mps",
33-
"blp-ar98.mps",
34-
"blp-ic98.mps",
35-
"bppc4-08.mps",
36-
"brazil3.mps",
37-
"cmflsp50-24-8-8.mps",
38-
"co-100.mps",
39-
"cod105.mps",
40-
"comp07-2idx.mps",
41-
"comp21-2idx.mps",
42-
"csched007.mps",
43-
"csched008.mps",
44-
"cvs16r128-89.mps",
45-
"dano3_3.mps",
46-
"decomp2.mps",
47-
"drayage-100-23.mps",
48-
"drayage-25-23.mps",
49-
"eil33-2.mps",
50-
"eilA101-2.mps",
51-
"exp-1-500-5-5.mps",
52-
"fast0507.mps",
53-
"fastxgemm-n2r6s0t2.mps",
54-
"fiball.mps",
55-
"gen-ip002.mps",
56-
"germanrr.mps",
57-
"glass4.mps",
58-
"graph20-20-1rand.mps",
59-
"graphdraw-domain.mps",
60-
"h80x6320d.mps",
61-
"highschool1-aigio.mps",
62-
"hypothyroid-k1.mps",
63-
"icir97_tension.mps",
64-
"irish-electricity.mps",
65-
"istanbul-no-cutoff.mps",
66-
"k1mushroom.mps",
67-
"lectsched-5-obj.mps",
68-
"leo1.mps",
69-
"leo2.mps",
70-
"lotsize.mps",
71-
"mad.mps",
72-
"map10.mps",
73-
"map16715-04.mps",
74-
"markshare2.mps",
75-
"markshare_4_0.mps",
76-
"mas74.mps",
77-
"mc11.mps",
78-
"mcsched.mps",
79-
"mik-250-20-75-4.mps",
80-
"momentum1.mps",
81-
"mushroom-best.mps",
82-
"mzzv11.mps",
83-
"mzzv42z.mps",
84-
"n2seq36q.mps",
85-
"n3div36.mps",
86-
"neos-1171448.mps",
87-
"neos-1171737.mps",
88-
"neos-1354092.mps",
89-
"neos-1445765.mps",
90-
"neos-1456979.mps",
91-
"neos-1582420.mps",
92-
"neos-2657525-crna.mps",
93-
"neos-2746589-doon.mps",
94-
"neos-3024952-loue.mps",
95-
"neos-3046615-murg.mps",
96-
"neos-3216931-puriri.mps",
97-
"neos-3402294-bobin.mps",
98-
"neos-3656078-kumeu.mps",
99-
"neos-3754480-nidda.mps",
100-
"neos-4300652-rahue.mps",
101-
"neos-4338804-snowy.mps",
102-
"neos-4387871-tavua.mps",
103-
"neos-4413714-turia.mps",
104-
"neos-4532248-waihi.mps",
105-
"neos-4722843-widden.mps",
106-
"neos-4738912-atrato.mps",
107-
"neos-4763324-toguru.mps",
108-
"neos-4954672-berkel.mps",
109-
"neos-5049753-cuanza.mps",
110-
"neos-5093327-huahum.mps",
111-
"neos-5107597-kakapo.mps",
112-
"neos-5114902-kasavu.mps",
113-
"neos-5188808-nattai.mps",
114-
"neos-5195221-niemur.mps",
115-
"neos-662469.mps",
116-
"neos-787933.mps",
117-
"neos-848589.mps",
118-
"neos-860300.mps",
119-
"neos-911970.mps",
120-
"neos-933966.mps",
121-
"neos-950242.mps",
122-
"neos17.mps",
123-
"neos5.mps",
124-
"net12.mps",
125-
"netdiversion.mps",
126-
"nexp-150-20-8-5.mps",
127-
"ns1644855.mps",
128-
"ns1760995.mps",
129-
"ns1830653.mps",
130-
"nursesched-medium-hint03.mps",
131-
"nursesched-sprint02.mps",
132-
"opm2-z10-s4.mps",
133-
"pg.mps",
134-
"physiciansched3-3.mps",
135-
"piperout-08.mps",
136-
"piperout-27.mps",
137-
"pk1.mps",
138-
"qap10.mps",
139-
"radiationm18-12-05.mps",
140-
"radiationm40-10-02.mps",
141-
"rail01.mps",
142-
"rail02.mps",
143-
"rail507.mps",
144-
"ran14x18-disj-8.mps",
145-
"rmatr100-p10.mps",
146-
"rmatr200-p5.mps",
147-
"rocI-4-11.mps",
148-
"rocII-5-11.mps",
149-
"rococoB10-011000.mps",
150-
"rococoC10-001000.mps",
151-
"roi2alpha3n4.mps",
152-
"roi5alpha10n8.mps",
153-
"roll3000.mps",
154-
"s100.mps",
155-
"s250r10.mps",
156-
"satellites2-40.mps",
157-
"satellites2-60-fs.mps",
158-
"savsched1.mps",
159-
"sct2.mps",
160-
"seymour.mps",
161-
"seymour1.mps",
162-
"sing326.mps",
163-
"sing44.mps",
164-
"sorrell3.mps",
165-
"sp97ar.mps",
166-
"sp98ar.mps",
167-
"splice1k1.mps",
168-
"square41.mps",
169-
"square47.mps",
170-
"supportcase10.mps",
171-
"supportcase12.mps",
172-
"supportcase18.mps",
173-
"supportcase26.mps",
174-
"supportcase33.mps",
175-
"supportcase40.mps",
176-
"supportcase42.mps",
177-
"supportcase6.mps",
178-
"supportcase7.mps",
179-
"swath1.mps",
180-
"swath3.mps",
181-
"tbfp-network.mps",
182-
"thor50dday.mps",
183-
"timtab1.mps",
184-
"tr12-30.mps",
185-
"traininstance2.mps",
186-
"traininstance6.mps",
187-
"trento1.mps",
188-
"uccase12.mps",
189-
"uct-subprob.mps",
190-
"unitcal_7.mps",
191-
"var-smallemery-m6j6.mps"};
20+
std::vector<std::string> instances = {"supportcase26_presolved.mps",
21+
"supportcase26_presolved_2.mps",
22+
"supportcase26_presolved_3.mps",
23+
"supportcase26_presolved_4.mps",
24+
"supportcase26_presolved_5.mps",
25+
"supportcase26_presolved_6.mps",
26+
"supportcase26_presolved_7.mps",
27+
"supportcase26_presolved_8.mps"};

benchmarks/linear_programming/cuopt/run_mip.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ int run_single_file(std::string file_path,
210210
settings.log_to_console = log_to_console;
211211
settings.tolerances.relative_tolerance = 1e-12;
212212
settings.tolerances.absolute_tolerance = 1e-6;
213+
settings.presolve = true;
213214
cuopt::linear_programming::benchmark_info_t benchmark_info;
214215
settings.benchmark_info_ptr = &benchmark_info;
215216
auto start_run_solver = std::chrono::high_resolution_clock::now();

cpp/include/cuopt/linear_programming/mip/solver_settings.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ class mip_solver_settings_t {
7272

7373
struct tolerances_t {
7474
f_t presolve_absolute_tolerance = 1.0e-6;
75-
f_t absolute_tolerance = 1.0e-4;
76-
f_t relative_tolerance = 1.0e-6;
75+
f_t absolute_tolerance = 1.0e-6;
76+
f_t relative_tolerance = 1.0e-12;
7777
f_t integrality_tolerance = 1.0e-5;
7878
f_t absolute_mip_gap = 1.0e-10;
7979
f_t relative_mip_gap = 1.0e-4;

cpp/include/cuopt/logger.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ inline rapids_logger::logger& default_logger()
7878
logger_.set_pattern(default_pattern());
7979
#endif
8080
logger_.set_level(default_level());
81-
logger_.flush_on(rapids_logger::level_enum::info);
81+
logger_.flush_on(rapids_logger::level_enum::debug);
8282

8383
return logger_;
8484
}();
@@ -100,7 +100,7 @@ inline void reset_default_logger()
100100
default_logger().set_pattern(default_pattern());
101101
#endif
102102
default_logger().set_level(default_level());
103-
default_logger().flush_on(rapids_logger::level_enum::info);
103+
default_logger().flush_on(rapids_logger::level_enum::debug);
104104
}
105105

106106
} // namespace cuopt

cpp/src/dual_simplex/bound_flipping_ratio_test.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ i_t bound_flipping_ratio_test_t<i_t, f_t>::single_pass(i_t start,
9393
}
9494
step_length = min_val;
9595
nonbasic_entering = candidate;
96+
// this should be temporary, find root causes where the candidate is not filled
97+
if (nonbasic_entering == -1) {
98+
// -1,-2 and -3 are reserved for other things
99+
return -4;
100+
}
96101
const i_t j = entering_index = nonbasic_list_[nonbasic_entering];
97102

98103
constexpr bool verbose = false;
@@ -137,6 +142,7 @@ i_t bound_flipping_ratio_test_t<i_t, f_t>::compute_step_length(f_t& step_length,
137142

138143
i_t k_idx = single_pass(
139144
0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index);
145+
if (k_idx == -4) { return -4; }
140146
bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0;
141147
if (!continue_search) {
142148
if constexpr (0) {

cpp/src/dual_simplex/branch_and_bound.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,9 +409,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
409409
assert(root_vstatus.size() == original_lp.num_cols);
410410
if (root_status == lp_status_t::INFEASIBLE) {
411411
settings.log.printf("MIP Infeasible\n");
412-
if (settings.heuristic_preemption_callback != nullptr) {
413-
settings.heuristic_preemption_callback();
414-
}
412+
// FIXME: rarely dual simplex detects infeasible whereas it is feasible.
413+
// to add a small safety net, check if there is a primal solution already.
414+
// Uncomment this if the issue with cost266-UUE is resolved
415+
// if (settings.heuristic_preemption_callback != nullptr) {
416+
// settings.heuristic_preemption_callback();
417+
// }
415418
return mip_status_t::INFEASIBLE;
416419
}
417420
if (root_status == lp_status_t::UNBOUNDED) {
@@ -434,8 +437,16 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
434437
if (settings.set_simplex_solution_callback != nullptr) {
435438
std::vector<f_t> original_x;
436439
uncrush_primal_solution(original_problem, original_lp, root_relax_soln.x, original_x);
437-
settings.set_simplex_solution_callback(original_x,
438-
compute_user_objective(original_lp, root_objective));
440+
std::vector<f_t> original_dual;
441+
std::vector<f_t> original_z;
442+
uncrush_dual_solution(original_problem,
443+
original_lp,
444+
root_relax_soln.y,
445+
root_relax_soln.z,
446+
original_dual,
447+
original_z);
448+
settings.set_simplex_solution_callback(
449+
original_x, original_dual, compute_user_objective(original_lp, root_objective));
439450
}
440451
mutex_lower.lock();
441452
f_t lower_bound = lower_bound_ = root_objective;

cpp/src/dual_simplex/phase2.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,13 +1495,13 @@ void compute_delta_y(const basis_update_mpf_t<i_t, f_t>& ft,
14951495
}
14961496

14971497
template <typename i_t, typename f_t>
1498-
void update_dual_variables(const sparse_vector_t<i_t, f_t>& delta_y_sparse,
1499-
const std::vector<i_t>& delta_z_indices,
1500-
const std::vector<f_t>& delta_z,
1501-
f_t step_length,
1502-
i_t leaving_index,
1503-
std::vector<f_t>& y,
1504-
std::vector<f_t>& z)
1498+
i_t update_dual_variables(const sparse_vector_t<i_t, f_t>& delta_y_sparse,
1499+
const std::vector<i_t>& delta_z_indices,
1500+
const std::vector<f_t>& delta_z,
1501+
f_t step_length,
1502+
i_t leaving_index,
1503+
std::vector<f_t>& y,
1504+
std::vector<f_t>& z)
15051505
{
15061506
// Update dual variables
15071507
// y <- y + steplength * delta_y
@@ -1517,6 +1517,7 @@ void update_dual_variables(const sparse_vector_t<i_t, f_t>& delta_y_sparse,
15171517
z[j] += step_length * delta_z[j];
15181518
}
15191519
z[leaving_index] += step_length * delta_z[leaving_index];
1520+
return 0;
15201521
}
15211522

15221523
template <typename i_t, typename f_t>
@@ -2514,6 +2515,10 @@ dual::status_t dual_phase2(i_t phase,
25142515
delta_z_indices,
25152516
nonbasic_mark);
25162517
entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index);
2518+
if (entering_index == -4) {
2519+
settings.log.printf("Numerical issues encountered in ratio test.\n");
2520+
return dual::status_t::NUMERICAL;
2521+
}
25172522
timers.bfrt_time += timers.stop_timer();
25182523
} else {
25192524
entering_index = phase2::phase2_ratio_test(
@@ -2663,8 +2668,12 @@ dual::status_t dual_phase2(i_t phase,
26632668
// Update dual variables
26642669
// y <- y + steplength * delta_y
26652670
// z <- z + steplength * delta_z
2666-
phase2::update_dual_variables(
2671+
i_t update_dual_variables_status = phase2::update_dual_variables(
26672672
delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z);
2673+
if (update_dual_variables_status == -1) {
2674+
settings.log.printf("Numerical issues encountered in update_dual_variables.\n");
2675+
return dual::status_t::NUMERICAL;
2676+
}
26682677
timers.vector_time += timers.stop_timer();
26692678

26702679
#ifdef COMPUTE_DUAL_RESIDUAL

cpp/src/dual_simplex/presolve.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,7 @@ void uncrush_dual_solution(const user_problem_t<i_t, f_t>& user_problem,
11341134
std::vector<f_t>& user_y,
11351135
std::vector<f_t>& user_z)
11361136
{
1137+
user_y.resize(user_problem.num_rows);
11371138
// Reduced costs are uncrushed just like the primal solution
11381139
uncrush_primal_solution(user_problem, problem, z, user_z);
11391140

cpp/src/dual_simplex/simplex_solver_settings.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ struct simplex_solver_settings_t {
109109
i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node
110110
std::function<void(std::vector<f_t>&, f_t)> solution_callback;
111111
std::function<void()> heuristic_preemption_callback;
112-
std::function<void(std::vector<f_t>&, f_t)> set_simplex_solution_callback;
112+
std::function<void(std::vector<f_t>&, std::vector<f_t>&, f_t)> set_simplex_solution_callback;
113113
mutable logger_t log;
114114
std::atomic<i_t>* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should
115115
// continue, 1 if solver should halt

cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,17 @@ pdlp_initial_scaling_strategy_t<i_t, f_t>::pdlp_initial_scaling_strategy_t(
3939
problem_t<i_t, f_t>& op_problem_scaled,
4040
i_t number_of_ruiz_iterations,
4141
f_t alpha,
42-
pdhg_solver_t<i_t, f_t>& pdhg_solver,
4342
rmm::device_uvector<f_t>& A_T,
4443
rmm::device_uvector<i_t>& A_T_offsets,
4544
rmm::device_uvector<i_t>& A_T_indices,
45+
pdhg_solver_t<i_t, f_t>* pdhg_solver_ptr,
4646
bool running_mip)
4747
: handle_ptr_(handle_ptr),
4848
stream_view_(handle_ptr_->get_stream()),
4949
primal_size_h_(op_problem_scaled.n_variables),
5050
dual_size_h_(op_problem_scaled.n_constraints),
5151
op_problem_scaled_(op_problem_scaled),
52-
pdhg_solver_(pdhg_solver),
52+
pdhg_solver_ptr_(pdhg_solver_ptr),
5353
A_T_(A_T),
5454
A_T_offsets_(A_T_offsets),
5555
A_T_indices_(A_T_indices),
@@ -398,7 +398,7 @@ void pdlp_initial_scaling_strategy_t<i_t, f_t>::scale_problem()
398398

399399
op_problem_scaled_.is_scaled_ = true;
400400
if (!running_mip_) {
401-
scale_solutions(pdhg_solver_.get_primal_solution(), pdhg_solver_.get_dual_solution());
401+
scale_solutions(pdhg_solver_ptr_->get_primal_solution(), pdhg_solver_ptr_->get_dual_solution());
402402
}
403403
}
404404

0 commit comments

Comments
 (0)