Skip to content

Commit 02ca50e

Browse files
jinhuang1102Jin Huang
andauthored
[profcheck] Fix missing profile metadata in ExpandMemCmp (#169979)
This patch fixes a profile metadata missing in the `ExpandMemCmp` pass when it expanding `memcmp` calls. This would cause branches between different blocks to lose their profile data, potentially leading to suboptimal code generation. The patch updates the `ExpandMemCmp` pass to set branch weights to a default `unknown`(50/50 weights) value when a profile is available. This prevents the expansion from making a previously profiled branch unprofiled. The patch also includes updates to the tests to reflect the new branch weights. Co-authored-by: Jin Huang <[email protected]>
1 parent c8fc766 commit 02ca50e

File tree

5 files changed

+54
-20
lines changed

5 files changed

+54
-20
lines changed

llvm/lib/CodeGen/ExpandMemCmp.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/IR/Dominators.h"
2626
#include "llvm/IR/IRBuilder.h"
2727
#include "llvm/IR/PatternMatch.h"
28+
#include "llvm/IR/ProfDataUtils.h"
2829
#include "llvm/InitializePasses.h"
2930
#include "llvm/Target/TargetMachine.h"
3031
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -488,6 +489,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
488489
// continue to next LoadCmpBlock or EndBlock.
489490
BasicBlock *BB = Builder.GetInsertBlock();
490491
BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
492+
setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
493+
CI->getFunction());
491494
Builder.Insert(CmpBr);
492495
if (DTU)
493496
DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
@@ -552,6 +555,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
552555
// to next LoadCmpBlock or EndBlock.
553556
BasicBlock *BB = Builder.GetInsertBlock();
554557
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
558+
setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
559+
CI->getFunction());
555560
Builder.Insert(CmpBr);
556561
if (DTU)
557562
DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
@@ -592,6 +597,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
592597
Value *Res =
593598
Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
594599
ConstantInt::get(Builder.getInt32Ty(), 1));
600+
setExplicitlyUnknownBranchWeightsIfProfiled(*cast<Instruction>(Res),
601+
DEBUG_TYPE, CI->getFunction());
595602

596603
PhiRes->addIncoming(Res, ResBlock.BB);
597604
BranchInst *NewBr = BranchInst::Create(EndBlock);

llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
22
; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
33
; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
44

@@ -98,23 +98,23 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y) {
9898
ret i32 %call
9999
}
100100

101-
define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
101+
define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
102102
; CHECK-LABEL: define i32 @cmp7(
103-
; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) {
103+
; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) !prof [[PROF0:![0-9]+]] {
104104
; CHECK-NEXT: br label [[LOADBB:%.*]]
105105
; CHECK: res_block:
106106
; CHECK-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
107107
; CHECK-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
108108
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
109-
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
109+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
110110
; CHECK-NEXT: br label [[ENDBLOCK:%.*]]
111111
; CHECK: loadbb:
112112
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 1
113113
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[Y]], align 1
114114
; CHECK-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
115115
; CHECK-NEXT: [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
116116
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
117-
; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
117+
; CHECK-NEXT: br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
118118
; CHECK: loadbb1:
119119
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
120120
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
@@ -123,7 +123,7 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) {
123123
; CHECK-NEXT: [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
124124
; CHECK-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
125125
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
126-
; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
126+
; CHECK-NEXT: br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]], !prof [[PROF1]]
127127
; CHECK: endblock:
128128
; CHECK-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
129129
; CHECK-NEXT: ret i32 [[PHI_RES]]
@@ -860,3 +860,11 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
860860
%conv = zext i1 %cmp to i32
861861
ret i32 %conv
862862
}
863+
864+
!0 = !{!"function_entry_count", i64 1000}
865+
;.
866+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
867+
;.
868+
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
869+
; CHECK: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
870+
;.

llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
22
; RUN: opt -S -expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
33
; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
44

@@ -34,20 +34,20 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl
3434
ret i32 %call
3535
}
3636

37-
define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
37+
define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
3838
; X32-LABEL: @cmp3(
3939
; X32-NEXT: br label [[LOADBB:%.*]]
4040
; X32: res_block:
4141
; X32-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
42-
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
42+
; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
4343
; X32-NEXT: br label [[ENDBLOCK:%.*]]
4444
; X32: loadbb:
4545
; X32-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
4646
; X32-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
4747
; X32-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
4848
; X32-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
4949
; X32-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
50-
; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
50+
; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
5151
; X32: loadbb1:
5252
; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
5353
; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
@@ -564,3 +564,10 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
564564
ret i32 %conv
565565
}
566566

567+
!0 = !{!"function_entry_count", i64 1000}
568+
;.
569+
; X32: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
570+
;.
571+
; X32: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
572+
; X32: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
573+
;.

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
22
; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
33
; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD
44
; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
@@ -36,20 +36,20 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl
3636
ret i32 %call
3737
}
3838

39-
define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
39+
define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
4040
; X64-LABEL: @cmp3(
4141
; X64-NEXT: br label [[LOADBB:%.*]]
4242
; X64: res_block:
4343
; X64-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
44-
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
44+
; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
4545
; X64-NEXT: br label [[ENDBLOCK:%.*]]
4646
; X64: loadbb:
4747
; X64-NEXT: [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
4848
; X64-NEXT: [[TMP6:%.*]] = load i16, ptr [[Y:%.*]], align 1
4949
; X64-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
5050
; X64-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
5151
; X64-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
52-
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
52+
; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
5353
; X64: loadbb1:
5454
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
5555
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
@@ -474,7 +474,7 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y) {
474474
ret i32 %conv
475475
}
476476

477-
define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
477+
define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
478478
; X64_1LD-LABEL: @cmp_eq3(
479479
; X64_1LD-NEXT: br label [[LOADBB:%.*]]
480480
; X64_1LD: res_block:
@@ -483,14 +483,14 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) {
483483
; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
484484
; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
485485
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
486-
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
486+
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]], !prof [[PROF1]]
487487
; X64_1LD: loadbb1:
488488
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
489489
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
490490
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
491491
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
492492
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
493-
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
493+
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]], !prof [[PROF1]]
494494
; X64_1LD: endblock:
495495
; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
496496
; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -1076,3 +1076,15 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y) {
10761076
ret i32 %conv
10771077
}
10781078

1079+
!0 = !{!"function_entry_count", i64 1000}
1080+
;.
1081+
; X64_1LD: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
1082+
;.
1083+
; X64_2LD: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
1084+
;.
1085+
; X64_1LD: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
1086+
; X64_1LD: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
1087+
;.
1088+
; X64_2LD: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
1089+
; X64_2LD: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
1090+
;.

llvm/utils/profcheck-xfail.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll
143143
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll
144144
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll
145145
Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
146-
Transforms/ExpandMemCmp/AArch64/memcmp.ll
147-
Transforms/ExpandMemCmp/X86/memcmp.ll
148-
Transforms/ExpandMemCmp/X86/memcmp-x32.ll
146+
Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
147+
Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
148+
Transforms/ExpandVariadics/intrinsics.ll
149149
Transforms/FixIrreducible/basic.ll
150150
Transforms/FixIrreducible/bug45623.ll
151151
Transforms/FixIrreducible/callbr.ll

0 commit comments

Comments
 (0)