@@ -504,26 +504,24 @@ __kernel void calculate_cut_segmented(
504504}
505505
506506real1 single_bit_flip_worker_segmented (__constant uint * theta , __global const real1 * * G_m , const int n , const int segment_size , const bool is_spin_glass , const int k ) {
507+ const size_t k_offset = k * (size_t )n ;
508+ const bool k_bit = !get_const_bit (theta , k );
507509 real1 energy = ZERO_R1 ;
508- const size_t n_st = (size_t )n ;
509- for (int u = 0 ; u < n ; ++ u ) {
510- const size_t u_offset = u * n_st ;
511- bool u_bit = get_const_bit (theta , u );
512- if (u == k ) {
513- u_bit = !u_bit ;
510+ for (int v = 0 ; v < k ; ++ v ) {
511+ const bool v_bit = get_const_bit (theta , v );
512+ real1 val = get_G_m (G_m , k_offset + v , segment_size );
513+ if (is_spin_glass ) {
514+ val *= 2 ;
514515 }
515- for (int v = u + 1 ; v < n ; ++ v ) {
516- const real1 val = get_G_m (G_m , u_offset + v , segment_size );
517- bool v_bit = get_const_bit (theta , v );
518- if (v == k ) {
519- v_bit = ! v_bit ;
520- }
521- if (u_bit != v_bit ) {
522- energy += val ;
523- } else if (is_spin_glass ) {
524- energy -= val ;
525- }
516+ energy += (k_bit != v_bit ) ? val : - val ;
517+ }
518+ for (int v = k + 1 ; v < n ; ++ v ) {
519+ const bool v_bit = get_const_bit (theta , v );
520+ real1 val = get_G_m (G_m , k_offset + v , segment_size );
521+ if (is_spin_glass ) {
522+ val *= 2 ;
526523 }
524+ energy += (k_bit != v_bit ) ? val : - val ;
527525 }
528526
529527 return energy ;
@@ -564,27 +562,55 @@ __kernel void single_bit_flips_segmented(
564562 reduce_energy_index (best_energy , best_i , loc_energy , loc_index , max_energy_ptr , max_index_ptr );
565563}
566564
567- real1 double_bit_flip_worker_segmented (__constant uint * theta , __global const real1 * * G_m , const int n , const int segment_size , const bool is_spin_glass , const int k , const int l ) {
565+ real1 double_bit_flip_worker_segmented (__constant uint * theta , __global const real1 * * G_m , const int n , const int segment_size , const bool is_spin_glass , int k , int l ) {
566+ if (l < k ) {
567+ int t = k ;
568+ k = l ;
569+ l = t ;
570+ }
571+ const size_t k_offset = k * (size_t )n ;
572+ const bool k_bit = !get_const_bit (theta , k );
573+ const size_t l_offset = l * (size_t )n ;
574+ const bool l_bit = !get_const_bit (theta , l );
568575 real1 energy = ZERO_R1 ;
569- const size_t n_st = (size_t )n ;
570- for (int u = 0 ; u < n ; ++ u ) {
571- const size_t u_offset = u * n_st ;
572- bool u_bit = get_const_bit (theta , u );
573- if ((u == k ) || (u == l )) {
574- u_bit = !u_bit ;
576+ for (int v = 0 ; v < k ; ++ v ) {
577+ const bool v_bit = get_const_bit (theta , v );
578+ real1 val = get_G_m (G_m , k_offset + v , segment_size );
579+ if (is_spin_glass ) {
580+ val *= 2 ;
575581 }
576- for (int v = u + 1 ; v < n ; ++ v ) {
577- const real1 val = get_G_m (G_m , u_offset + v , segment_size );
578- bool v_bit = get_const_bit (theta , v );
579- if ((v == k ) || (v == l )) {
580- v_bit = ! v_bit ;
581- }
582- if (u_bit != v_bit ) {
583- energy += val ;
584- } else if (is_spin_glass ) {
585- energy -= val ;
586- }
582+ energy += (k_bit != v_bit ) ? val : - val ;
583+ val = get_G_m (G_m , l_offset + v , segment_size );
584+ if (is_spin_glass ) {
585+ val *= 2 ;
586+ }
587+ energy += (l_bit != v_bit ) ? val : - val ;
588+ }
589+ for (int v = k + 1 ; v < l ; ++ v ) {
590+ const bool v_bit = get_const_bit (theta , v );
591+ real1 val = get_G_m (G_m , k_offset + v , segment_size );
592+ if (is_spin_glass ) {
593+ val *= 2 ;
587594 }
595+ energy += (k_bit != v_bit ) ? val : - val ;
596+ val = get_G_m (G_m , l_offset + v , segment_size );
597+ if (is_spin_glass ) {
598+ val *= 2 ;
599+ }
600+ energy += (l_bit != v_bit ) ? val : - val ;
601+ }
602+ for (int v = l + 1 ; v < n ; ++ v ) {
603+ const bool v_bit = get_const_bit (theta , v );
604+ real1 val = get_G_m (G_m , k_offset + v , segment_size );
605+ if (is_spin_glass ) {
606+ val *= 2 ;
607+ }
608+ energy += (k_bit != v_bit ) ? val : - val ;
609+ val = get_G_m (G_m , l_offset + v , segment_size );
610+ if (is_spin_glass ) {
611+ val *= 2 ;
612+ }
613+ energy += (l_bit != v_bit ) ? val : - val ;
588614 }
589615
590616 return energy ;
@@ -959,7 +985,7 @@ __kernel void gray(
959985 real1 best_energy = ZERO_R1 ;
960986 for (uint u = 0 ; u < n ; u ++ ) {
961987 const size_t u_offset = u * n ;
962- int u_bit = get_local_bit (theta_local , u );
988+ const bool u_bit = get_local_bit (theta_local , u );
963989 for (uint v = u + 1 ; v < n ; v ++ ) {
964990 const bool v_bit = get_local_bit (theta_local , v );
965991 const real1 val = G_m [u_offset + v ];
@@ -977,7 +1003,7 @@ __kernel void gray(
9771003 real1 energy = ZERO_R1 ;
9781004 for (uint u = 0 ; u < n ; u ++ ) {
9791005 const size_t u_offset = u * n ;
980- int u_bit = get_local_bit (theta_local , u );
1006+ const bool u_bit = get_local_bit (theta_local , u );
9811007 for (uint v = u + 1 ; v < n ; v ++ ) {
9821008 const bool v_bit = get_local_bit (theta_local , v );
9831009 const real1 val = G_m [u_offset + v ];
@@ -1041,7 +1067,7 @@ __kernel void gray_segmented(
10411067 real1 best_energy = ZERO_R1 ;
10421068 for (uint u = 0 ; u < n ; u ++ ) {
10431069 const size_t u_offset = u * n ;
1044- int u_bit = get_local_bit (theta_local , u );
1070+ const bool u_bit = get_local_bit (theta_local , u );
10451071 for (uint v = u + 1 ; v < n ; v ++ ) {
10461072 const bool v_bit = get_local_bit (theta_local , v );
10471073 const real1 val = get_G_m (G_m , u_offset + v , segment_size );
@@ -1059,7 +1085,7 @@ __kernel void gray_segmented(
10591085 real1 energy = ZERO_R1 ;
10601086 for (uint u = 0 ; u < n ; u ++ ) {
10611087 const size_t u_offset = u * n ;
1062- int u_bit = get_local_bit (theta_local , u );
1088+ const bool u_bit = get_local_bit (theta_local , u );
10631089 for (uint v = u + 1 ; v < n ; v ++ ) {
10641090 const bool v_bit = get_local_bit (theta_local , v );
10651091 const real1 val = get_G_m (G_m , u_offset + v , segment_size );
@@ -1118,7 +1144,7 @@ __kernel void gray_sparse(
11181144
11191145 real1 best_energy = ZERO_R1 ;
11201146 for (uint u = 0 ; u < n ; u ++ ) {
1121- int u_bit = get_local_bit (theta_local , u );
1147+ const bool u_bit = get_local_bit (theta_local , u );
11221148 const size_t mCol = G_rows [u + 1 ];
11231149 for (int col = G_rows [u ]; col < mCol ; ++ col ) {
11241150 const int v = G_cols [col ];
@@ -1137,7 +1163,7 @@ __kernel void gray_sparse(
11371163 const size_t flip_bit = gray_code_next (theta_local , i , block << 6U );
11381164 real1 energy = ZERO_R1 ;
11391165 for (uint u = 0 ; u < n ; u ++ ) {
1140- int u_bit = get_local_bit (theta_local , u );
1166+ const bool u_bit = get_local_bit (theta_local , u );
11411167 const size_t mCol = G_rows [u + 1 ];
11421168 for (int col = G_rows [u ]; col < mCol ; ++ col ) {
11431169 const int v = G_cols [col ];
@@ -1205,7 +1231,7 @@ __kernel void gray_sparse_segmented(
12051231 real1 best_energy = ZERO_R1 ;
12061232 for (uint u = 0 ; u < n ; u ++ ) {
12071233 const size_t u_offset = u * n ;
1208- int u_bit = get_local_bit (theta_local , u );
1234+ const bool u_bit = get_local_bit (theta_local , u );
12091235 const uint row_end = G_rows [u + 1 ];
12101236 for (uint col = G_rows [u ]; col < row_end ; ++ col ) {
12111237 const int v = G_cols [col ];
@@ -1225,7 +1251,7 @@ __kernel void gray_sparse_segmented(
12251251 real1 energy = ZERO_R1 ;
12261252 for (uint u = 0 ; u < n ; u ++ ) {
12271253 const size_t u_offset = u * n ;
1228- int u_bit = get_local_bit (theta_local , u );
1254+ const bool u_bit = get_local_bit (theta_local , u );
12291255 const uint row_end = G_rows [u + 1 ];
12301256 for (uint col = G_rows [u ]; col < row_end ; ++ col ) {
12311257 const int v = G_cols [col ];
0 commit comments