[profcheck] Add unknown branch weights to expanded cmpxchg loop. (#165841)

jinhuang1102 · Jin Huang · web-flow · commit efa7ab06ebf7 · 2025-11-05T09:33:09.000-08:00
The AtomicExpandPass is responsible for lowering high-level atomic
operations (like `atomicrmw fadd`) that are unsupported by the target
hardware into a cmpxchg retry loop.

Given that we cannot empirically prove the precision branch weights, It
uses the `setExplicitlyUnknownBranchWeightsIfProfiled` function to
explicitly add "unknown" (50/50) branch weights to this branch.

This PR includes fies for the following tests:
```
Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
Transforms/AtomicExpand/AArch64/pcsections.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-system.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-nand.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-agent.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-v2bf16-system.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-agent.ll
Transforms/AtomicExpand/AMDGPU/expand-atomic-v2f16-system.ll
Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll
Transforms/AtomicExpand/ARM/atomicrmw-fp.ll
Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll
Transforms/AtomicExpand/Mips/atomicrmw-fp.ll
Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll
Transforms/AtomicExpand/RISCV/atomicrmw-fp.ll
Transforms/AtomicExpand/SPARC/libcalls.ll
Transforms/AtomicExpand/X86/expand-atomic-rmw-fp.ll
Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll
```

Co-authored-by: Jin Huang &lt;jingold@google.com&gt;
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1686,7 +1686,12 @@ Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
 
   Loaded->addIncoming(NewLoaded, LoopBB);
 
-  Builder.CreateCondBr(Success, ExitBB, LoopBB);
+  Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+  // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
+  // cannot be easily determined here, we mark the branch as "unknown" (50/50)
+  // to prevent misleading optimizations.
+  setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE);
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
   return NewLoaded;
diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
@@ -1,7 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -mtriple=aarch64-linux-gnu -passes=atomic-expand %s | FileCheck %s
 
-define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
+define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) !prof !0 {
 ; CHECK-LABEL: @test_atomicrmw_fadd_f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
@@ -14,7 +14,7 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
 ; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
 ; CHECK-NEXT:    [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
 ; CHECK-NEXT:    [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
-; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
+; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    ret float [[TMP5]]
 ;
@@ -336,3 +336,11 @@ define <2 x half> @atomicrmw_fminimum_2_x_half(ptr %ptr, <2 x half> %val) {
   %res = atomicrmw fminimum ptr %ptr, <2 x half> %val seq_cst
   ret <2 x half> %res
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"atomic-expand"}
+;.