@@ -134,6 +134,13 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
134134 entry:: LLVM.Function )
135135 entry_fn = LLVM. name (entry)
136136
137+ # get rid of unreachable control flow (JuliaLang/Metal.jl#370)
138+ if job. config. target. macos < v " 15"
139+ for f in functions (mod)
140+ replace_unreachable! (job, f)
141+ end
142+ end
143+
137144 # add kernel metadata
138145 if job. config. kernel
139146 entry = add_address_spaces! (job, mod, entry)
@@ -142,6 +149,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
142149
143150 add_module_metadata! (job, mod)
144151
152+ # JuliaLang/Metal.jl#113
145153 hide_noreturn! (mod)
146154 end
147155
@@ -1075,3 +1083,98 @@ function annotate_air_intrinsics!(@nospecialize(job::CompilerJob), mod::LLVM.Mod
10751083
10761084 return changed
10771085end
1086+
1087+ # replace unreachable control flow with branches to the exit block
1088+ #
1089+ # before macOS 15, code generated by Julia 1.11 causes compilation failures in the back-end.
1090+ # the reduced example contains unreachable control flow executed divergently, so this is a
1091+ # similar issue as encountered with NVIDIA, albeit causing crashes instead of miscompiles.
1092+ #
1093+ # the proposed solution is to avoid (divergent) unreachable control flow, instead replacing
1094+ # it by branches to the exit block. since `unreachable` doesn't lower to anything that
1095+ # aborts the kernel anyway (can we fix this?), this transformation should be safe.
1096+ function replace_unreachable! (@nospecialize (job:: CompilerJob ), f:: LLVM.Function )
1097+ # find unreachable instructions and exit blocks
1098+ unreachables = Instruction[]
1099+ exit_blocks = BasicBlock[]
1100+ for bb in blocks (f), inst in instructions (bb)
1101+ if isa (inst, LLVM. UnreachableInst)
1102+ push! (unreachables, inst)
1103+ end
1104+ if isa (inst, LLVM. RetInst)
1105+ push! (exit_blocks, bb)
1106+ end
1107+ end
1108+ isempty (unreachables) && return false
1109+
1110+ # if we don't have an exit block, we can't do much. we could insert a return, but that
1111+ # would probably keep the problematic control flow just as it is.
1112+ isempty (exit_blocks) && return false
1113+
1114+ @dispose builder= IRBuilder () begin
1115+ # if we have multiple exit blocks, take the last one, which is hopefully the least
1116+ # divergent (assuming divergent control flow is the root of the problem here).
1117+ exit_block = last (exit_blocks)
1118+ ret = terminator (exit_block)
1119+
1120+ # create a return block with only the return instruction, so that we only have to
1121+ # care about any values returned, and not about any other SSA value in the block.
1122+ if first (instructions (exit_block)) == ret
1123+ # we can reuse the exit block if it only contains the return
1124+ return_block = exit_block
1125+ else
1126+ # split the exit block right before the ret
1127+ return_block = BasicBlock (f, " ret" )
1128+ move_after (return_block, exit_block)
1129+
1130+ # emit a branch
1131+ position! (builder, ret)
1132+ br! (builder, return_block)
1133+
1134+ # move the return
1135+ delete! (exit_block, ret)
1136+ position! (builder, return_block)
1137+ insert! (builder, ret)
1138+ end
1139+
1140+ # when returning a value, add a phi node to the return block, so that we can later
1141+ # add incoming undef values when branching from `unreachable` blocks
1142+ if ! isempty (operands (ret))
1143+ position! (builder, ret)
1144+ # XXX : support aggregate returns?
1145+ val = only (operands (ret))
1146+ phi = phi! (builder, value_type (val))
1147+ for pred in predecessors (return_block)
1148+ push! (incoming (phi), (val, pred))
1149+ end
1150+ operands (ret)[1 ] = phi
1151+ end
1152+
1153+ # replace the unreachable with a branch to the return block
1154+ for unreachable in unreachables
1155+ bb = LLVM. parent (unreachable)
1156+
1157+ # remove preceding traps to avoid reconstructing unreachable control flow
1158+ prev = previnst (unreachable)
1159+ if isa (prev, LLVM. CallInst) && name (called_operand (prev)) == " llvm.trap"
1160+ unsafe_delete! (bb, prev)
1161+ end
1162+
1163+ # replace the unreachable with a branch to the return block
1164+ position! (builder, unreachable)
1165+ br! (builder, return_block)
1166+ unsafe_delete! (bb, unreachable)
1167+
1168+ # patch up any phi nodes in the return block
1169+ for inst in instructions (return_block)
1170+ if isa (inst, LLVM. PHIInst)
1171+ undef = UndefValue (value_type (inst))
1172+ vals = incoming (inst)
1173+ push! (vals, (undef, bb))
1174+ end
1175+ end
1176+ end
1177+ end
1178+
1179+ return true
1180+ end
0 commit comments