@@ -80,8 +80,40 @@ Base.length(range::NDRange) = length(blocks(range))
8080 CartesianIndex (nI)
8181end
8282
83+
84+ """
85+ assume(cond::Bool)
86+
87+ Assume that the condition `cond` is true. This is a hint to the compiler, possibly enabling
88+ it to optimize more aggressively.
89+ """
90+ @inline assume (cond:: Bool ) = Base. llvmcall (("""
91+ declare void @llvm.assume(i1)
92+
93+ define void @entry(i8) #0 {
94+ %cond = icmp eq i8 %0, 1
95+ call void @llvm.assume(i1 %cond)
96+ ret void
97+ }
98+
99+ attributes #0 = { alwaysinline }""" , " entry" ),
100+ Nothing, Tuple{Bool}, cond)
101+
102+ @inline function assume_nonzero (CI:: CartesianIndices )
103+ ntuple (Val (ndims (CI))) do I
104+ @inline
105+ indices = CI. indices[I]
106+ assume (indices. stop > 0 )
107+ end
108+ end
109+
83110Base. @propagate_inbounds function expand (ndrange:: NDRange , groupidx:: Integer , idx:: Integer )
84- expand (ndrange, blocks (ndrange)[groupidx], workitems (ndrange)[idx])
111+ # this causes a exception branch and a div
112+ B = blocks (ndrange)
113+ W = workitems (ndrange)
114+ assume_nonzero (B)
115+ assume_nonzero (W)
116+ expand (ndrange, B[groupidx], workitems (ndrange)[idx])
85117end
86118
87119Base. @propagate_inbounds function expand (ndrange:: NDRange{N} , groupidx:: CartesianIndex{N} , idx:: Integer ) where {N}
0 commit comments