Skip to content

Commit fa5cd27

Browse files
jinhuang1102Jin Huang
andauthored
[profcheck] Add unknown branch weights to expand LL/SR loop. (#166273)
As a follow-up to PR#165841, this change addresses `prof_md` metadata loss in AtomicExpandPass when lowering `atomicrmw xchg` to a Load-Linked/Store-Exclusive (LL/SC) loop. This path is distinct from the LSE path addressed previously: PR #165841 (and its tests) used `-mtriple=aarch64-linux-gnu`, which targets a modern **ARMv8.1+** architecture. This architecture supports **Large System Extensions (LSE)**, allowing `atomicrmw` to be lowered directly to a more efficient hardware instruction. This PR (and its tests) uses `-mtriple=aarch64--` or `-mtriple=armv8-linux-gnueabihf`. This indicates an `ARMv8.0 or lower architecture that does not support LSE`. On these targets, the pass must fall back to synthesizing a manual LL/SC loop using the `ldaxr/stxr` instruction pair. Similar to previous issue, the new conditional branch was failin to inherit the `prof_md` metadata. Theis PR correctly fix the branch weights to the newly created branch within the LL/SC loop, ensuring profile information is preserved. Co-authored-by: Jin Huang <[email protected]>
1 parent 6d4e75c commit fa5cd27

File tree

2 files changed

+30
-10
lines changed

2 files changed

+30
-10
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "llvm/IR/MDBuilder.h"
3939
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
4040
#include "llvm/IR/Module.h"
41+
#include "llvm/IR/ProfDataUtils.h"
4142
#include "llvm/IR/Type.h"
4243
#include "llvm/IR/User.h"
4344
#include "llvm/IR/Value.h"
@@ -1259,8 +1260,7 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop(
12591260
BasicBlock *BB = Builder.GetInsertBlock();
12601261
Function *F = BB->getParent();
12611262

1262-
assert(AddrAlign >=
1263-
F->getDataLayout().getTypeStoreSize(ResultTy) &&
1263+
assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
12641264
"Expected at least natural alignment at this point.");
12651265

12661266
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
@@ -1295,7 +1295,13 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop(
12951295
TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
12961296
Value *TryAgain = Builder.CreateICmpNE(
12971297
StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1298-
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1298+
1299+
Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1300+
1301+
// Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1302+
// hard to predict precise branch weigths we mark the branch as "unknown"
1303+
// (50/50) to prevent misleading optimizations.
1304+
setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE);
12991305

13001306
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
13011307
return Loaded;

llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
22
; RUN: opt -codegen-opt-level=1 -S -mtriple=aarch64-- -passes=atomic-expand %s | FileCheck %s
33
; RUN: opt -codegen-opt-level=1 -S -mtriple=aarch64-- -mattr=+outline-atomics -passes=atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
44

5-
define void @atomic_swap_f16(ptr %ptr, half %val) nounwind {
5+
define void @atomic_swap_f16(ptr %ptr, half %val) !prof !0 {
66
; CHECK-LABEL: @atomic_swap_f16(
77
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL:%.*]] to i16
88
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -12,7 +12,7 @@ define void @atomic_swap_f16(ptr %ptr, half %val) nounwind {
1212
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP1]] to i64
1313
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP4]], ptr elementtype(i16) [[PTR]])
1414
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0
15-
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
15+
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]], !prof [[PROF1:![0-9]+]]
1616
; CHECK: atomicrmw.end:
1717
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to half
1818
; CHECK-NEXT: ret void
@@ -27,7 +27,7 @@ define void @atomic_swap_f16(ptr %ptr, half %val) nounwind {
2727
ret void
2828
}
2929

30-
define void @atomic_swap_f32(ptr %ptr, float %val) nounwind {
30+
define void @atomic_swap_f32(ptr %ptr, float %val) nounwind !prof !0 {
3131
; CHECK-LABEL: @atomic_swap_f32(
3232
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[VAL:%.*]] to i32
3333
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
@@ -37,7 +37,7 @@ define void @atomic_swap_f32(ptr %ptr, float %val) nounwind {
3737
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
3838
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP4]], ptr elementtype(i32) [[PTR]])
3939
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP5]], 0
40-
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
40+
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]], !prof [[PROF1]]
4141
; CHECK: atomicrmw.end:
4242
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP3]] to float
4343
; CHECK-NEXT: ret void
@@ -52,15 +52,15 @@ define void @atomic_swap_f32(ptr %ptr, float %val) nounwind {
5252
ret void
5353
}
5454

55-
define void @atomic_swap_f64(ptr %ptr, double %val) nounwind {
55+
define void @atomic_swap_f64(ptr %ptr, double %val) nounwind !prof !0 {
5656
; CHECK-LABEL: @atomic_swap_f64(
5757
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[VAL:%.*]] to i64
5858
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
5959
; CHECK: atomicrmw.start:
6060
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i64) [[PTR:%.*]])
6161
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP1]], ptr elementtype(i64) [[PTR]])
6262
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP3]], 0
63-
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
63+
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]], !prof [[PROF1]]
6464
; CHECK: atomicrmw.end:
6565
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP2]] to double
6666
; CHECK-NEXT: ret void
@@ -74,3 +74,17 @@ define void @atomic_swap_f64(ptr %ptr, double %val) nounwind {
7474
%t1 = atomicrmw xchg ptr %ptr, double %val acquire
7575
ret void
7676
}
77+
78+
!0 = !{!"function_entry_count", i64 1000}
79+
;.
80+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
81+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nounwind willreturn }
82+
;.
83+
; OUTLINE-ATOMICS: attributes #[[ATTR0:[0-9]+]] = { "target-features"="+outline-atomics" }
84+
; OUTLINE-ATOMICS: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-features"="+outline-atomics" }
85+
;.
86+
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
87+
; CHECK: [[PROF1]] = !{!"unknown", !"atomic-expand"}
88+
;.
89+
; OUTLINE-ATOMICS: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
90+
;.

0 commit comments

Comments
 (0)