|
29 | 29 | #include "gc/shenandoah/shenandoahMarkBitMap.hpp" |
30 | 30 |
|
31 | 31 | #include "runtime/atomicAccess.hpp" |
| 32 | +#include "utilities/count_leading_zeros.hpp" |
32 | 33 | #include "utilities/count_trailing_zeros.hpp" |
33 | 34 |
|
34 | 35 | inline size_t ShenandoahMarkBitMap::address_to_index(const HeapWord* addr) const { |
@@ -169,10 +170,99 @@ inline ShenandoahMarkBitMap::idx_t ShenandoahMarkBitMap::get_next_bit_impl(idx_t |
169 | 170 | return r_index; |
170 | 171 | } |
171 | 172 |
|
| 173 | +template<ShenandoahMarkBitMap::bm_word_t flip, bool aligned_left> |
| 174 | +inline ShenandoahMarkBitMap::idx_t ShenandoahMarkBitMap::get_prev_bit_impl(idx_t l_index, idx_t r_index) const { |
| 175 | + STATIC_ASSERT(flip == find_ones_flip || flip == find_zeros_flip); |
| 176 | + verify_range(l_index, r_index); |
| 177 | + assert(!aligned_left || is_aligned(l_index, BitsPerWord), "l_index not aligned"); |
| 178 | + |
| 179 | + // The first word often contains an interesting bit, either due to |
| 180 | + // density or because of features of the calling algorithm. So it's |
| 181 | + // important to examine that first word with a minimum of fuss, |
| 182 | + // minimizing setup time for later words that will be wasted if the |
| 183 | + // first word is indeed interesting. |
| 184 | + |
| 185 | + // The benefit from aligned_left being true is relatively small. |
| 186 | + // It saves an operation in the setup for the word search loop. |
| 187 | + // It also eliminates the range check on the final result. |
| 188 | + // However, callers often have a comparison with l_index, and |
| 189 | + // inlining often allows the two comparisons to be combined; it is |
| 190 | + // important when !aligned_left that return paths either return |
| 191 | + // l_index or a value dominating a comparison with l_index. |
| 192 | + // aligned_left is still helpful when the caller doesn't have a |
| 193 | + // range check because features of the calling algorithm guarantee |
| 194 | + // an interesting bit will be present. |
| 195 | + |
| 196 | + if (l_index < r_index) { |
| 197 | + // Get the word containing r_index, and shift out the high-order bits (representing objects that come after r_index) |
| 198 | + idx_t index = to_words_align_down(r_index); |
| 199 | + assert(BitsPerWord - 2 >= bit_in_word(r_index), "sanity"); |
| 200 | + size_t shift = BitsPerWord - 2 - bit_in_word(r_index); |
| 201 | + bm_word_t cword = (map(index) ^ flip) << shift; |
| 202 | + // After this shift, the highest order bits correspond to r_index. |
| 203 | + |
| 204 | + // We give special handling if either of the two most significant bits (Weak or Strong) is set. With 64-bit |
| 205 | + // words, the mask of interest is 0xc000_0000_0000_0000. Symbolically, this constant is represented by: |
| 206 | + const bm_word_t first_object_mask = ((bm_word_t) 0x3) << (BitsPerWord - 2); |
| 207 | + if ((cword & first_object_mask) != 0) { |
| 208 | + // The first object is similarly often interesting. When it matters |
| 209 | + // (density or features of the calling algorithm make it likely |
| 210 | + // the first bit is set), going straight to the next clause compares |
| 211 | + // poorly with doing this check first; count_leading_zeros can be |
| 212 | + // relatively expensive, plus there is the additional range check. |
| 213 | + // But when the first bit isn't set, the cost of having tested for |
| 214 | + // it is relatively small compared to the rest of the search. |
| 215 | + return r_index; |
| 216 | + } else if (cword != 0) { |
| 217 | + // Note that there are 2 bits corresponding to every index value (Weak and Strong), and every odd index value |
| 218 | + // corresponds to the same object as index-1 |
| 219 | + // Flipped and shifted first word is non-zero. If leading_zeros is 0 or 1, we return r_index (above). |
| 220 | + // if leading zeros is 2 or 3, we return (r_index - 1) or (r_index - 2), and so forth |
| 221 | + idx_t result = r_index + 1 - count_leading_zeros(cword); |
| 222 | + if (aligned_left || (result >= l_index)) return result; |
| 223 | + else { |
| 224 | + // Sentinel value means no object found within specified range. |
| 225 | + return r_index + 2; |
| 226 | + } |
| 227 | + } else { |
| 228 | + // Flipped and shifted first word is zero. Word search through |
| 229 | + // aligned up r_index for a non-zero flipped word. |
| 230 | + idx_t limit = aligned_left |
| 231 | + ? to_words_align_down(l_index) // Minuscule savings when aligned. |
| 232 | + : to_words_align_up(l_index); |
| 233 | + // Unsigned index is always >= unsigned limit if limit equals zero, so test for strictly greater than before decrement. |
| 234 | + while (index-- > limit) { |
| 235 | + cword = map(index) ^ flip; |
| 236 | + if (cword != 0) { |
| 237 | + // cword hods bits: |
| 238 | + // 0x03 for the object corresponding to index (and index+1) (count_leading_zeros is 62 or 63) |
| 239 | + // 0x0c for the object corresponding to index + 2 (and index+3) (count_leading_zeros is 60 or 61) |
| 240 | + // and so on. |
| 241 | + idx_t result = bit_index(index + 1) - (count_leading_zeros(cword) + 1); |
| 242 | + if (aligned_left || (result >= l_index)) return result; |
| 243 | + else { |
| 244 | + // Sentinel value means no object found within specified range. |
| 245 | + return r_index + 2; |
| 246 | + } |
| 247 | + } |
| 248 | + } |
| 249 | + // No bits in range; return r_index+2. |
| 250 | + return r_index + 2; |
| 251 | + } |
| 252 | + } |
| 253 | + else { |
| 254 | + return r_index + 2; |
| 255 | + } |
| 256 | +} |
| 257 | + |
172 | 258 | inline ShenandoahMarkBitMap::idx_t ShenandoahMarkBitMap::get_next_one_offset(idx_t l_offset, idx_t r_offset) const { |
173 | 259 | return get_next_bit_impl<find_ones_flip, false>(l_offset, r_offset); |
174 | 260 | } |
175 | 261 |
|
| 262 | +inline ShenandoahMarkBitMap::idx_t ShenandoahMarkBitMap::get_prev_one_offset(idx_t l_offset, idx_t r_offset) const { |
| 263 | + return get_prev_bit_impl<find_ones_flip, false>(l_offset, r_offset); |
| 264 | +} |
| 265 | + |
176 | 266 | // Returns a bit mask for a range of bits [beg, end) within a single word. Each |
177 | 267 | // bit in the mask is 0 if the bit is in the range, 1 if not in the range. The |
178 | 268 | // returned mask can be used directly to clear the range, or inverted to set the |
|
0 commit comments