@@ -81,8 +81,40 @@ Base.length(range::NDRange) = length(blocks(range))
8181 CartesianIndex (nI)
8282end
8383
84+
85+ """
86+ assume(cond::Bool)
87+
88+ Assume that the condition `cond` is true. This is a hint to the compiler, possibly enabling
89+ it to optimize more aggressively.
90+ """
91+ @inline assume (cond:: Bool ) = Base. llvmcall (("""
92+ declare void @llvm.assume(i1)
93+
94+ define void @entry(i8) #0 {
95+ %cond = icmp eq i8 %0, 1
96+ call void @llvm.assume(i1 %cond)
97+ ret void
98+ }
99+
100+ attributes #0 = { alwaysinline }""" , " entry" ),
101+ Nothing, Tuple{Bool}, cond)
102+
103+ @inline function assume_nonzero (CI:: CartesianIndices )
104+ ntuple (Val (ndims (CI))) do I
105+ Base. @_inline_meta
106+ indices = CI. indices[I]
107+ assume (indices. stop > 0 )
108+ end
109+ end
110+
84111Base. @propagate_inbounds function expand (ndrange:: NDRange , groupidx:: Integer , idx:: Integer )
85- expand (ndrange, blocks (ndrange)[groupidx], workitems (ndrange)[idx])
112+ # this causes a exception branch and a div
113+ B = blocks (ndrange)
114+ W = workitems (ndrange)
115+ assume_nonzero (B)
116+ assume_nonzero (W)
117+ expand (ndrange, B[groupidx], workitems (ndrange)[idx])
86118end
87119
88120Base. @propagate_inbounds function expand (ndrange:: NDRange{N} , groupidx:: CartesianIndex{N} , idx:: Integer ) where {N}
0 commit comments