@@ -343,18 +343,37 @@ function update_grid!(neighborhood_search::GridNeighborhoodSearch{<:Any, Paralle
343343    return  neighborhood_search
344344end 
345345
346- @inline  function  foreach_neighbor (f, system_coords, neighbor_system_coords,
347-                                   neighborhood_search:: GridNeighborhoodSearch , point;
348-                                   search_radius =  search_radius (neighborhood_search))
346+ @propagate_inbounds  function  foreach_neighbor (f, system_coords, neighbor_system_coords,
347+                                               neighborhood_search:: GridNeighborhoodSearch ,
348+                                               point;
349+                                               search_radius =  search_radius (neighborhood_search))
350+     #  Due to https://github.com/JuliaLang/julia/issues/30411, we cannot just remove
351+     #  a `@boundscheck` by calling this function with `@inbounds` because it has a kwarg.
352+     #  We have to use `@propagate_inbounds`, which will also remove boundschecks
353+     #  in the neighbor loop, which is not safe (see comment below).
354+     #  To avoid this, we have to use a function barrier to disable the `@inbounds` again.
355+     point_coords =  extract_svector (system_coords, Val (ndims (neighborhood_search)), point)
356+ 
357+     __foreach_neighbor (f, system_coords, neighbor_system_coords, neighborhood_search,
358+                        point, point_coords, search_radius)
359+ end 
360+ 
361+ @inline  function  __foreach_neighbor (f, system_coords, neighbor_system_coords,
362+                                     neighborhood_search:: GridNeighborhoodSearch ,
363+                                     point, point_coords, search_radius)
349364    (; periodic_box) =  neighborhood_search
350365
351-     point_coords =  extract_svector (system_coords, Val (ndims (neighborhood_search)), point)
352366    cell =  cell_coords (point_coords, neighborhood_search)
353367
354368    for  neighbor_cell_ in  neighboring_cells (cell, neighborhood_search)
355369        neighbor_cell =  Tuple (neighbor_cell_)
370+         neighbors =  points_in_cell (neighbor_cell, neighborhood_search)
371+ 
372+         for  neighbor_ in  eachindex (neighbors)
373+             neighbor =  @inbounds  neighbors[neighbor_]
356374
357-         for  neighbor in  points_in_cell (neighbor_cell, neighborhood_search)
375+             #  Making the following `@inbounds` yields a ~2% speedup on an NVIDIA H100.
376+             #  But we don't know if `neighbor` (extracted from the cell list) is in bounds.
358377            neighbor_coords =  extract_svector (neighbor_system_coords,
359378                                              Val (ndims (neighborhood_search)), neighbor)
360379
392411                      for  cell in  neighboring_cells (cell, neighborhood_search))
393412end 
394413
395- @inline   function  points_in_cell (cell_index, neighborhood_search)
414+ @propagate_inbounds   function  points_in_cell (cell_index, neighborhood_search)
396415    (; cell_list) =  neighborhood_search
397416
398417    return  cell_list[periodic_cell_index (cell_index, neighborhood_search)]
0 commit comments