Skip to content

Commit 8467498

Browse files
Optimize segmented
1 parent 4891d85 commit 8467498

File tree

1 file changed

+69
-43
lines changed

1 file changed

+69
-43
lines changed

pyqrackising/kernels.cl

Lines changed: 69 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -504,26 +504,24 @@ __kernel void calculate_cut_segmented(
504504
}
505505

506506
real1 single_bit_flip_worker_segmented(__constant uint* theta, __global const real1** G_m, const int n, const int segment_size, const bool is_spin_glass, const int k) {
507+
const size_t k_offset = k * (size_t)n;
508+
const bool k_bit = !get_const_bit(theta, k);
507509
real1 energy = ZERO_R1;
508-
const size_t n_st = (size_t)n;
509-
for (int u = 0; u < n; ++u) {
510-
const size_t u_offset = u * n_st;
511-
bool u_bit = get_const_bit(theta, u);
512-
if (u == k) {
513-
u_bit = !u_bit;
510+
for (int v = 0; v < k; ++v) {
511+
const bool v_bit = get_const_bit(theta, v);
512+
real1 val = get_G_m(G_m, k_offset + v, segment_size);
513+
if (is_spin_glass) {
514+
val *= 2;
514515
}
515-
for (int v = u + 1; v < n; ++v) {
516-
const real1 val = get_G_m(G_m, u_offset + v, segment_size);
517-
bool v_bit = get_const_bit(theta, v);
518-
if (v == k) {
519-
v_bit = ! v_bit;
520-
}
521-
if (u_bit != v_bit) {
522-
energy += val;
523-
} else if (is_spin_glass) {
524-
energy -= val;
525-
}
516+
energy += (k_bit != v_bit) ? val : -val;
517+
}
518+
for (int v = k + 1; v < n; ++v) {
519+
const bool v_bit = get_const_bit(theta, v);
520+
real1 val = get_G_m(G_m, k_offset + v, segment_size);
521+
if (is_spin_glass) {
522+
val *= 2;
526523
}
524+
energy += (k_bit != v_bit) ? val : -val;
527525
}
528526

529527
return energy;
@@ -564,27 +562,55 @@ __kernel void single_bit_flips_segmented(
564562
reduce_energy_index(best_energy, best_i, loc_energy, loc_index, max_energy_ptr, max_index_ptr);
565563
}
566564

567-
real1 double_bit_flip_worker_segmented(__constant uint* theta, __global const real1** G_m, const int n, const int segment_size, const bool is_spin_glass, const int k, const int l) {
565+
real1 double_bit_flip_worker_segmented(__constant uint* theta, __global const real1** G_m, const int n, const int segment_size, const bool is_spin_glass, int k, int l) {
566+
if (l < k) {
567+
int t = k;
568+
k = l;
569+
l = t;
570+
}
571+
const size_t k_offset = k * (size_t)n;
572+
const bool k_bit = !get_const_bit(theta, k);
573+
const size_t l_offset = l * (size_t)n;
574+
const bool l_bit = !get_const_bit(theta, l);
568575
real1 energy = ZERO_R1;
569-
const size_t n_st = (size_t)n;
570-
for (int u = 0; u < n; ++u) {
571-
const size_t u_offset = u * n_st;
572-
bool u_bit = get_const_bit(theta, u);
573-
if ((u == k) || (u == l)) {
574-
u_bit = !u_bit;
576+
for (int v = 0; v < k; ++v) {
577+
const bool v_bit = get_const_bit(theta, v);
578+
real1 val = get_G_m(G_m, k_offset + v, segment_size);
579+
if (is_spin_glass) {
580+
val *= 2;
575581
}
576-
for (int v = u + 1; v < n; ++v) {
577-
const real1 val = get_G_m(G_m, u_offset + v, segment_size);
578-
bool v_bit = get_const_bit(theta, v);
579-
if ((v == k) || (v == l)) {
580-
v_bit = ! v_bit;
581-
}
582-
if (u_bit != v_bit) {
583-
energy += val;
584-
} else if (is_spin_glass) {
585-
energy -= val;
586-
}
582+
energy += (k_bit != v_bit) ? val : -val;
583+
val = get_G_m(G_m, l_offset + v, segment_size);
584+
if (is_spin_glass) {
585+
val *= 2;
586+
}
587+
energy += (l_bit != v_bit) ? val : -val;
588+
}
589+
for (int v = k + 1; v < l; ++v) {
590+
const bool v_bit = get_const_bit(theta, v);
591+
real1 val = get_G_m(G_m, k_offset + v, segment_size);
592+
if (is_spin_glass) {
593+
val *= 2;
587594
}
595+
energy += (k_bit != v_bit) ? val : -val;
596+
val = get_G_m(G_m, l_offset + v, segment_size);
597+
if (is_spin_glass) {
598+
val *= 2;
599+
}
600+
energy += (l_bit != v_bit) ? val : -val;
601+
}
602+
for (int v = l + 1; v < n; ++v) {
603+
const bool v_bit = get_const_bit(theta, v);
604+
real1 val = get_G_m(G_m, k_offset + v, segment_size);
605+
if (is_spin_glass) {
606+
val *= 2;
607+
}
608+
energy += (k_bit != v_bit) ? val : -val;
609+
val = get_G_m(G_m, l_offset + v, segment_size);
610+
if (is_spin_glass) {
611+
val *= 2;
612+
}
613+
energy += (l_bit != v_bit) ? val : -val;
588614
}
589615

590616
return energy;
@@ -959,7 +985,7 @@ __kernel void gray(
959985
real1 best_energy = ZERO_R1;
960986
for (uint u = 0; u < n; u++) {
961987
const size_t u_offset = u * n;
962-
int u_bit = get_local_bit(theta_local, u);
988+
const bool u_bit = get_local_bit(theta_local, u);
963989
for (uint v = u + 1; v < n; v++) {
964990
const bool v_bit = get_local_bit(theta_local, v);
965991
const real1 val = G_m[u_offset + v];
@@ -977,7 +1003,7 @@ __kernel void gray(
9771003
real1 energy = ZERO_R1;
9781004
for (uint u = 0; u < n; u++) {
9791005
const size_t u_offset = u * n;
980-
int u_bit = get_local_bit(theta_local, u);
1006+
const bool u_bit = get_local_bit(theta_local, u);
9811007
for (uint v = u + 1; v < n; v++) {
9821008
const bool v_bit = get_local_bit(theta_local, v);
9831009
const real1 val = G_m[u_offset + v];
@@ -1041,7 +1067,7 @@ __kernel void gray_segmented(
10411067
real1 best_energy = ZERO_R1;
10421068
for (uint u = 0; u < n; u++) {
10431069
const size_t u_offset = u * n;
1044-
int u_bit = get_local_bit(theta_local, u);
1070+
const bool u_bit = get_local_bit(theta_local, u);
10451071
for (uint v = u + 1; v < n; v++) {
10461072
const bool v_bit = get_local_bit(theta_local, v);
10471073
const real1 val = get_G_m(G_m, u_offset + v, segment_size);
@@ -1059,7 +1085,7 @@ __kernel void gray_segmented(
10591085
real1 energy = ZERO_R1;
10601086
for (uint u = 0; u < n; u++) {
10611087
const size_t u_offset = u * n;
1062-
int u_bit = get_local_bit(theta_local, u);
1088+
const bool u_bit = get_local_bit(theta_local, u);
10631089
for (uint v = u + 1; v < n; v++) {
10641090
const bool v_bit = get_local_bit(theta_local, v);
10651091
const real1 val = get_G_m(G_m, u_offset + v, segment_size);
@@ -1118,7 +1144,7 @@ __kernel void gray_sparse(
11181144

11191145
real1 best_energy = ZERO_R1;
11201146
for (uint u = 0; u < n; u++) {
1121-
int u_bit = get_local_bit(theta_local, u);
1147+
const bool u_bit = get_local_bit(theta_local, u);
11221148
const size_t mCol = G_rows[u + 1];
11231149
for (int col = G_rows[u]; col < mCol; ++col) {
11241150
const int v = G_cols[col];
@@ -1137,7 +1163,7 @@ __kernel void gray_sparse(
11371163
const size_t flip_bit = gray_code_next(theta_local, i, block << 6U);
11381164
real1 energy = ZERO_R1;
11391165
for (uint u = 0; u < n; u++) {
1140-
int u_bit = get_local_bit(theta_local, u);
1166+
const bool u_bit = get_local_bit(theta_local, u);
11411167
const size_t mCol = G_rows[u + 1];
11421168
for (int col = G_rows[u]; col < mCol; ++col) {
11431169
const int v = G_cols[col];
@@ -1205,7 +1231,7 @@ __kernel void gray_sparse_segmented(
12051231
real1 best_energy = ZERO_R1;
12061232
for (uint u = 0; u < n; u++) {
12071233
const size_t u_offset = u * n;
1208-
int u_bit = get_local_bit(theta_local, u);
1234+
const bool u_bit = get_local_bit(theta_local, u);
12091235
const uint row_end = G_rows[u + 1];
12101236
for (uint col = G_rows[u]; col < row_end; ++col) {
12111237
const int v = G_cols[col];
@@ -1225,7 +1251,7 @@ __kernel void gray_sparse_segmented(
12251251
real1 energy = ZERO_R1;
12261252
for (uint u = 0; u < n; u++) {
12271253
const size_t u_offset = u * n;
1228-
int u_bit = get_local_bit(theta_local, u);
1254+
const bool u_bit = get_local_bit(theta_local, u);
12291255
const uint row_end = G_rows[u + 1];
12301256
for (uint col = G_rows[u]; col < row_end; ++col) {
12311257
const int v = G_cols[col];

0 commit comments

Comments
 (0)