Skip to content

Commit 77ef070

Browse files
committed
Merge remote-tracking branch 'origin/main' into pr/s32-shift
2 parents 714883d + ff98efa commit 77ef070

File tree

11 files changed

+88
-56
lines changed

11 files changed

+88
-56
lines changed

llvm/include/llvm/Transforms/IPO/SampleProfile.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
4141
SampleProfileLoaderPass(
4242
std::string File = "", std::string RemappingFile = "",
4343
ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None,
44-
IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
44+
IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr,
45+
bool DisableSampleProfileInlining = false,
46+
bool UseFlattenedProfile = false);
4547

4648
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
4749

@@ -50,6 +52,8 @@ class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
5052
std::string ProfileRemappingFileName;
5153
const ThinOrFullLTOPhase LTOPhase;
5254
IntrusiveRefCntPtr<vfs::FileSystem> FS;
55+
bool DisableSampleProfileInlining;
56+
bool UseFlattenedProfile;
5357
};
5458

5559
} // end namespace llvm

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,19 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
21622162
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
21632163
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
21642164

2165+
if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2166+
// Explicitly disable sample loader inlining and use flattened profile in O0
2167+
// pipeline.
2168+
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2169+
PGOOpt->ProfileRemappingFile,
2170+
ThinOrFullLTOPhase::None, nullptr,
2171+
/*DisableSampleProfileInlining=*/true,
2172+
/*UseFlattenedProfile=*/true));
2173+
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2174+
// RequireAnalysisPass for PSI before subsequent non-module passes.
2175+
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2176+
}
2177+
21652178
invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
21662179

21672180
// Build a minimal pipeline based on the semantics required by LLVM,

llvm/lib/Target/RISCV/RISCVCombine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ def RISCVO0PreLegalizerCombiner: GICombiner<
2424
def RISCVPostLegalizerCombiner
2525
: GICombiner<"RISCVPostLegalizerCombinerImpl",
2626
[sub_to_add, combines_for_extload, redundant_and,
27-
identity_combines, commute_constant_to_rhs,
27+
identity_combines, shift_immed_chain, commute_constant_to_rhs,
2828
constant_fold_cast_op]> {
2929
}

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
469469
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
470470
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
471471
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
472-
LazyCallGraph &CG)
472+
LazyCallGraph &CG, bool DisableSampleProfileInlining,
473+
bool UseFlattenedProfile)
473474
: SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
474475
std::move(FS)),
475476
GetAC(std::move(GetAssumptionCache)),
@@ -478,7 +479,9 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
478479
AnnotatedPassName(AnnotateSampleProfileInlinePhase
479480
? llvm::AnnotateInlinePassName(InlineContext{
480481
LTOPhase, InlinePass::SampleProfileInliner})
481-
: CSINLINE_DEBUG) {}
482+
: CSINLINE_DEBUG),
483+
DisableSampleProfileInlining(DisableSampleProfileInlining),
484+
UseFlattenedProfile(UseFlattenedProfile) {}
482485

483486
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
484487
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -592,6 +595,10 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
592595
// attribute.
593596
bool ProfAccForSymsInList;
594597

598+
bool DisableSampleProfileInlining;
599+
600+
bool UseFlattenedProfile;
601+
595602
// External inline advisor used to replay inline decision from remarks.
596603
std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
597604

@@ -919,7 +926,7 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
919926
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
920927
SmallVector<CallBase *, 8> *InlinedCallSite) {
921928
// Bail out early if sample-loader inliner is disabled.
922-
if (DisableSampleLoaderInlining)
929+
if (DisableSampleProfileInlining)
923930
return false;
924931

925932
// Bail out early if MaxNumPromotions is zero.
@@ -1230,7 +1237,7 @@ bool SampleProfileLoader::tryInlineCandidate(
12301237
InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
12311238
// Do not attempt to inline a candidate if
12321239
// --disable-sample-loader-inlining is true.
1233-
if (DisableSampleLoaderInlining)
1240+
if (DisableSampleProfileInlining)
12341241
return false;
12351242

12361243
CallBase &CB = *Candidate.CallInstr;
@@ -1974,6 +1981,13 @@ bool SampleProfileLoader::doInitialization(Module &M,
19741981

19751982
PSL = Reader->getProfileSymbolList();
19761983

1984+
if (DisableSampleLoaderInlining.getNumOccurrences())
1985+
DisableSampleProfileInlining = DisableSampleLoaderInlining;
1986+
1987+
if (UseFlattenedProfile)
1988+
ProfileConverter::flattenProfile(Reader->getProfiles(),
1989+
Reader->profileIsCS());
1990+
19771991
// While profile-sample-accurate is on, ignore symbol list.
19781992
ProfAccForSymsInList =
19791993
ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
@@ -2304,9 +2318,12 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
23042318
}
23052319
SampleProfileLoaderPass::SampleProfileLoaderPass(
23062320
std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
2307-
IntrusiveRefCntPtr<vfs::FileSystem> FS)
2321+
IntrusiveRefCntPtr<vfs::FileSystem> FS, bool DisableSampleProfileInlining,
2322+
bool UseFlattenedProfile)
23082323
: ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2309-
LTOPhase(LTOPhase), FS(std::move(FS)) {}
2324+
LTOPhase(LTOPhase), FS(std::move(FS)),
2325+
DisableSampleProfileInlining(DisableSampleProfileInlining),
2326+
UseFlattenedProfile(UseFlattenedProfile) {}
23102327

23112328
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
23122329
ModuleAnalysisManager &AM) {
@@ -2331,7 +2348,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
23312348
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
23322349
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
23332350
: ProfileRemappingFileName,
2334-
LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG);
2351+
LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2352+
DisableSampleProfileInlining, UseFlattenedProfile);
23352353
if (!SampleLoader.doInitialization(M, &FAM))
23362354
return PreservedAnalyses::all();
23372355

llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,11 @@ declare i16 @llvm.abs.i16(i16, i1 immarg)
1313
declare i32 @llvm.abs.i32(i32, i1 immarg)
1414
declare i64 @llvm.abs.i64(i64, i1 immarg)
1515

16-
; FIXME: Could combine back to back srais.
1716
define i8 @abs8(i8 %x) {
1817
; RV32I-LABEL: abs8:
1918
; RV32I: # %bb.0:
2019
; RV32I-NEXT: slli a1, a0, 24
21-
; RV32I-NEXT: srai a1, a1, 24
22-
; RV32I-NEXT: srai a1, a1, 7
20+
; RV32I-NEXT: srai a1, a1, 31
2321
; RV32I-NEXT: add a0, a0, a1
2422
; RV32I-NEXT: xor a0, a0, a1
2523
; RV32I-NEXT: ret
@@ -34,8 +32,7 @@ define i8 @abs8(i8 %x) {
3432
; RV64I-LABEL: abs8:
3533
; RV64I: # %bb.0:
3634
; RV64I-NEXT: slli a1, a0, 56
37-
; RV64I-NEXT: srai a1, a1, 56
38-
; RV64I-NEXT: srai a1, a1, 7
35+
; RV64I-NEXT: srai a1, a1, 63
3936
; RV64I-NEXT: addw a0, a0, a1
4037
; RV64I-NEXT: xor a0, a0, a1
4138
; RV64I-NEXT: ret
@@ -50,13 +47,11 @@ define i8 @abs8(i8 %x) {
5047
ret i8 %abs
5148
}
5249

53-
; FIXME: Could combine back to back srais.
5450
define i16 @abs16(i16 %x) {
5551
; RV32I-LABEL: abs16:
5652
; RV32I: # %bb.0:
5753
; RV32I-NEXT: slli a1, a0, 16
58-
; RV32I-NEXT: srai a1, a1, 16
59-
; RV32I-NEXT: srai a1, a1, 15
54+
; RV32I-NEXT: srai a1, a1, 31
6055
; RV32I-NEXT: add a0, a0, a1
6156
; RV32I-NEXT: xor a0, a0, a1
6257
; RV32I-NEXT: ret
@@ -71,8 +66,7 @@ define i16 @abs16(i16 %x) {
7166
; RV64I-LABEL: abs16:
7267
; RV64I: # %bb.0:
7368
; RV64I-NEXT: slli a1, a0, 48
74-
; RV64I-NEXT: srai a1, a1, 48
75-
; RV64I-NEXT: srai a1, a1, 15
69+
; RV64I-NEXT: srai a1, a1, 63
7670
; RV64I-NEXT: addw a0, a0, a1
7771
; RV64I-NEXT: xor a0, a0, a1
7872
; RV64I-NEXT: ret

llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb-zbkb.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -335,13 +335,11 @@ define i8 @srli_i8(i8 %a) nounwind {
335335
}
336336

337337
; FIXME: We should use slli+srai with Zbb for better compression.
338-
; FIXME: We should combine back to back srai.
339338
define i8 @srai_i8(i8 %a) nounwind {
340339
; RV32I-LABEL: srai_i8:
341340
; RV32I: # %bb.0:
342341
; RV32I-NEXT: slli a0, a0, 24
343-
; RV32I-NEXT: srai a0, a0, 24
344-
; RV32I-NEXT: srai a0, a0, 5
342+
; RV32I-NEXT: srai a0, a0, 29
345343
; RV32I-NEXT: ret
346344
;
347345
; RV32ZBB-LABEL: srai_i8:
@@ -353,8 +351,7 @@ define i8 @srai_i8(i8 %a) nounwind {
353351
; RV32ZBKB-LABEL: srai_i8:
354352
; RV32ZBKB: # %bb.0:
355353
; RV32ZBKB-NEXT: slli a0, a0, 24
356-
; RV32ZBKB-NEXT: srai a0, a0, 24
357-
; RV32ZBKB-NEXT: srai a0, a0, 5
354+
; RV32ZBKB-NEXT: srai a0, a0, 29
358355
; RV32ZBKB-NEXT: ret
359356
%1 = ashr i8 %a, 5
360357
ret i8 %1
@@ -380,13 +377,11 @@ define i16 @srli_i16(i16 %a) nounwind {
380377
}
381378

382379
; FIXME: We should use slli+srai with Zbb/Zbkb for better compression.
383-
; FIXME: We should combine back to back sraiw.
384380
define i16 @srai_i16(i16 %a) nounwind {
385381
; RV32I-LABEL: srai_i16:
386382
; RV32I: # %bb.0:
387383
; RV32I-NEXT: slli a0, a0, 16
388-
; RV32I-NEXT: srai a0, a0, 16
389-
; RV32I-NEXT: srai a0, a0, 9
384+
; RV32I-NEXT: srai a0, a0, 25
390385
; RV32I-NEXT: ret
391386
;
392387
; RV32ZBB-LABEL: srai_i16:
@@ -398,8 +393,7 @@ define i16 @srai_i16(i16 %a) nounwind {
398393
; RV32ZBKB-LABEL: srai_i16:
399394
; RV32ZBKB: # %bb.0:
400395
; RV32ZBKB-NEXT: slli a0, a0, 16
401-
; RV32ZBKB-NEXT: srai a0, a0, 16
402-
; RV32ZBKB-NEXT: srai a0, a0, 9
396+
; RV32ZBKB-NEXT: srai a0, a0, 25
403397
; RV32ZBKB-NEXT: ret
404398
%1 = ashr i16 %a, 9
405399
ret i16 %1

llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -620,9 +620,9 @@ define i32 @sextb_i32(i32 %a) nounwind {
620620
define i64 @sextb_i64(i64 %a) nounwind {
621621
; RV32I-LABEL: sextb_i64:
622622
; RV32I: # %bb.0:
623-
; RV32I-NEXT: slli a0, a0, 24
624-
; RV32I-NEXT: srai a0, a0, 24
625-
; RV32I-NEXT: srai a1, a0, 31
623+
; RV32I-NEXT: slli a1, a0, 24
624+
; RV32I-NEXT: srai a0, a1, 24
625+
; RV32I-NEXT: srai a1, a1, 31
626626
; RV32I-NEXT: ret
627627
;
628628
; RV32ZBB-LABEL: sextb_i64:
@@ -655,9 +655,9 @@ define i32 @sexth_i32(i32 %a) nounwind {
655655
define i64 @sexth_i64(i64 %a) nounwind {
656656
; RV32I-LABEL: sexth_i64:
657657
; RV32I: # %bb.0:
658-
; RV32I-NEXT: slli a0, a0, 16
659-
; RV32I-NEXT: srai a0, a0, 16
660-
; RV32I-NEXT: srai a1, a0, 31
658+
; RV32I-NEXT: slli a1, a0, 16
659+
; RV32I-NEXT: srai a0, a1, 16
660+
; RV32I-NEXT: srai a1, a1, 31
661661
; RV32I-NEXT: ret
662662
;
663663
; RV32ZBB-LABEL: sexth_i64:

llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,6 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
433433
ret i64 %1
434434
}
435435

436-
; FIXME: We should use srli instead of srliw for better compression.
437436
define i8 @srli_i8(i8 %a) nounwind {
438437
; CHECK-LABEL: srli_i8:
439438
; CHECK: # %bb.0:
@@ -445,13 +444,11 @@ define i8 @srli_i8(i8 %a) nounwind {
445444
}
446445

447446
; FIXME: We should use slli+srai with Zbb for better compression.
448-
; FIXME: We should combine back to back sraiw.
449447
define i8 @srai_i8(i8 %a) nounwind {
450448
; RV64I-LABEL: srai_i8:
451449
; RV64I: # %bb.0:
452450
; RV64I-NEXT: slli a0, a0, 56
453-
; RV64I-NEXT: srai a0, a0, 56
454-
; RV64I-NEXT: srai a0, a0, 5
451+
; RV64I-NEXT: srai a0, a0, 61
455452
; RV64I-NEXT: ret
456453
;
457454
; RV64ZBB-LABEL: srai_i8:
@@ -463,8 +460,7 @@ define i8 @srai_i8(i8 %a) nounwind {
463460
; RV64ZBKB-LABEL: srai_i8:
464461
; RV64ZBKB: # %bb.0:
465462
; RV64ZBKB-NEXT: slli a0, a0, 56
466-
; RV64ZBKB-NEXT: srai a0, a0, 56
467-
; RV64ZBKB-NEXT: srai a0, a0, 5
463+
; RV64ZBKB-NEXT: srai a0, a0, 61
468464
; RV64ZBKB-NEXT: ret
469465
%1 = ashr i8 %a, 5
470466
ret i8 %1
@@ -490,13 +486,11 @@ define i16 @srli_i16(i16 %a) nounwind {
490486
}
491487

492488
; FIXME: We should use slli+srai with Zbb for better compression.
493-
; FIXME: We should combine back to back srai.
494489
define i16 @srai_i16(i16 %a) nounwind {
495490
; RV64I-LABEL: srai_i16:
496491
; RV64I: # %bb.0:
497492
; RV64I-NEXT: slli a0, a0, 48
498-
; RV64I-NEXT: srai a0, a0, 48
499-
; RV64I-NEXT: srai a0, a0, 9
493+
; RV64I-NEXT: srai a0, a0, 57
500494
; RV64I-NEXT: ret
501495
;
502496
; RV64ZBB-LABEL: srai_i16:
@@ -508,8 +502,7 @@ define i16 @srai_i16(i16 %a) nounwind {
508502
; RV64ZBKB-LABEL: srai_i16:
509503
; RV64ZBKB: # %bb.0:
510504
; RV64ZBKB-NEXT: slli a0, a0, 48
511-
; RV64ZBKB-NEXT: srai a0, a0, 48
512-
; RV64ZBKB-NEXT: srai a0, a0, 9
505+
; RV64ZBKB-NEXT: srai a0, a0, 57
513506
; RV64ZBKB-NEXT: ret
514507
%1 = ashr i16 %a, 9
515508
ret i16 %1

llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -268,16 +268,19 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
268268
define i32 @ctlz_lshr_i32(i32 signext %a) {
269269
; RV64I-LABEL: ctlz_lshr_i32:
270270
; RV64I: # %bb.0:
271-
; RV64I-NEXT: srliw a0, a0, 1
272-
; RV64I-NEXT: beqz a0, .LBB4_2
271+
; RV64I-NEXT: srliw a1, a0, 1
272+
; RV64I-NEXT: beqz a1, .LBB4_2
273273
; RV64I-NEXT: # %bb.1: # %cond.false
274274
; RV64I-NEXT: addi sp, sp, -16
275275
; RV64I-NEXT: .cfi_def_cfa_offset 16
276276
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
277277
; RV64I-NEXT: .cfi_offset ra, -8
278-
; RV64I-NEXT: srli a1, a0, 1
279-
; RV64I-NEXT: or a0, a0, a1
280-
; RV64I-NEXT: srliw a1, a0, 2
278+
; RV64I-NEXT: li a2, 2
279+
; RV64I-NEXT: srliw a0, a0, 2
280+
; RV64I-NEXT: or a0, a1, a0
281+
; RV64I-NEXT: slli a1, a0, 32
282+
; RV64I-NEXT: srli a1, a1, 32
283+
; RV64I-NEXT: srl a1, a1, a2
281284
; RV64I-NEXT: or a0, a0, a1
282285
; RV64I-NEXT: srliw a1, a0, 4
283286
; RV64I-NEXT: or a0, a0, a1
@@ -286,11 +289,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
286289
; RV64I-NEXT: srliw a1, a0, 16
287290
; RV64I-NEXT: or a0, a0, a1
288291
; RV64I-NEXT: srliw a1, a0, 1
289-
; RV64I-NEXT: lui a2, 349525
290-
; RV64I-NEXT: addi a2, a2, 1365
291-
; RV64I-NEXT: and a1, a1, a2
292+
; RV64I-NEXT: lui a3, 349525
293+
; RV64I-NEXT: addi a3, a3, 1365
294+
; RV64I-NEXT: and a1, a1, a3
292295
; RV64I-NEXT: subw a0, a0, a1
293-
; RV64I-NEXT: srliw a1, a0, 2
296+
; RV64I-NEXT: slli a1, a0, 32
297+
; RV64I-NEXT: srli a1, a1, 32
298+
; RV64I-NEXT: srl a1, a1, a2
294299
; RV64I-NEXT: lui a2, 209715
295300
; RV64I-NEXT: addi a2, a2, 819
296301
; RV64I-NEXT: and a1, a1, a2

llvm/test/Other/new-pm-pgo-O0.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@
99
; RUN: |FileCheck %s --check-prefixes=USE_POST_LINK,USE
1010
; RUN: opt -debug-pass-manager -passes='lto<O0>' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 \
1111
; RUN: |FileCheck %s --check-prefixes=USE_POST_LINK,USE
12+
; RUN: opt -debug-pass-manager -passes='default<O0>' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
13+
; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE
1214

13-
;
1415
; GEN: Running pass: PGOInstrumentationGen
1516
; USE_DEFAULT: Running pass: PGOInstrumentationUse
1617
; USE_PRE_LINK: Running pass: PGOInstrumentationUse
1718
; USE_POST_LINK-NOT: Running pass: PGOInstrumentationUse
1819
; USE-NOT: Running pass: PGOIndirectCallPromotion
1920
; USE-NOT: Running pass: PGOMemOPSizeOpt
2021

22+
; SAMPLE_USE: Running pass: AddDiscriminatorsPass
23+
; SAMPLE_USE: Running pass: SampleProfileLoaderPass
24+
2125
define void @foo() {
2226
ret void
2327
}

0 commit comments

Comments
 (0)