@@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
295295 Inst->setMetadata (LLVMContext::MD_mmra, MMRAMetadata::getMD (Ctx, MMRAs));
296296}
297297
298+ static Intrinsic::ID getIntrinsicIDforWaveReduction (unsigned BuiltinID) {
299+ switch (BuiltinID) {
300+ default :
301+ llvm_unreachable (" Unknown BuiltinID for wave reduction" );
302+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
303+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
304+ return Intrinsic::amdgcn_wave_reduce_add;
305+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
306+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
307+ return Intrinsic::amdgcn_wave_reduce_sub;
308+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
309+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
310+ return Intrinsic::amdgcn_wave_reduce_min;
311+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
312+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
313+ return Intrinsic::amdgcn_wave_reduce_umin;
314+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
315+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
316+ return Intrinsic::amdgcn_wave_reduce_max;
317+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
318+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
319+ return Intrinsic::amdgcn_wave_reduce_umax;
320+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
321+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
322+ return Intrinsic::amdgcn_wave_reduce_and;
323+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
324+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
325+ return Intrinsic::amdgcn_wave_reduce_or;
326+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
327+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
328+ return Intrinsic::amdgcn_wave_reduce_xor;
329+ }
330+ }
331+
298332Value *CodeGenFunction::EmitAMDGPUBuiltinExpr (unsigned BuiltinID,
299333 const CallExpr *E) {
300334 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
301335 llvm::SyncScope::ID SSID;
302336 switch (BuiltinID) {
337+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
338+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
339+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
340+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
341+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
342+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
343+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
344+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
345+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
346+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
347+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
348+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
349+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
350+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
351+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
352+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
353+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
354+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: {
355+ Intrinsic::ID IID = getIntrinsicIDforWaveReduction (BuiltinID);
356+ llvm::Value *Value = EmitScalarExpr (E->getArg (0 ));
357+ llvm::Value *Strategy = EmitScalarExpr (E->getArg (1 ));
358+ llvm::Function *F = CGM.getIntrinsic (IID, {Value->getType ()});
359+ return Builder.CreateCall (F, {Value, Strategy});
360+ }
303361 case AMDGPU::BI__builtin_amdgcn_div_scale:
304362 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
305363 // Translate from the intrinsics's struct return to the builtin's out
0 commit comments