diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index d7d809dfdd5f6..2a6bda839d36e 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -181,24 +181,52 @@ static cl::opt SampledInstr("sampled-instrumentation", cl::ZeroOrMore, static cl::opt SampledInstrPeriod( "sampled-instr-period", - cl::desc("Set the profile instrumentation sample period. For each sample " - "period, a fixed number of consecutive samples will be recorded. " - "The number is controlled by 'sampled-instr-burst-duration' flag. " - "The default sample period of 65535 is optimized for generating " - "efficient code that leverages unsigned integer wrapping in " - "overflow."), - cl::init(65535)); + cl::desc("Set the profile instrumentation sample period. A sample period " + "of 0 is invalid. For each sample period, a fixed number of " + "consecutive samples will be recorded. The number is controlled " + "by 'sampled-instr-burst-duration' flag. The default sample " + "period of 65536 is optimized for generating efficient code that " + "leverages unsigned short integer wrapping in overflow, but this " + "is disabled under simple sampling (burst duration = 1)."), + cl::init(USHRT_MAX + 1)); static cl::opt SampledInstrBurstDuration( "sampled-instr-burst-duration", cl::desc("Set the profile instrumentation burst duration, which can range " - "from 0 to one less than the value of 'sampled-instr-period'. " + "from 1 to the value of 'sampled-instr-period' (0 is invalid). " "This number of samples will be recorded for each " - "'sampled-instr-period' count update. Setting to 1 enables " - "simple sampling, in which case it is recommended to set " + "'sampled-instr-period' count update. Setting to 1 enables simple " + "sampling, in which case it is recommended to set " "'sampled-instr-period' to a prime number."), cl::init(200)); +struct SampledInstrumentationConfig { + unsigned BurstDuration; + unsigned Period; + bool UseShort; + bool IsSimpleSampling; + bool IsFastSampling; +}; + +static SampledInstrumentationConfig getSampledInstrumentationConfig() { + SampledInstrumentationConfig config; + config.BurstDuration = SampledInstrBurstDuration.getValue(); + config.Period = SampledInstrPeriod.getValue(); + if (config.BurstDuration > config.Period) + report_fatal_error( + "SampledBurstDuration must be less than or equal to SampledPeriod"); + if (config.Period == 0 || config.BurstDuration == 0) + report_fatal_error( + "SampledPeriod and SampledBurstDuration must be greater than 0"); + config.IsSimpleSampling = (config.BurstDuration == 1); + // If (BurstDuration == 1 && Period == 65536), generate the simple sampling + // style code. + config.IsFastSampling = + (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1); + config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling; + return config; +} + using LoadStorePair = std::pair; static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { @@ -665,7 +693,7 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, // (1) Full burst sampling: We transform: // Increment_Instruction; // to: -// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { +// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) { // Increment_Instruction; // } // __llvm_profile_sampling__ += 1; @@ -680,14 +708,14 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, // "__llvm_profile_sampling__" variable is an unsigned type, meaning it will // wrap around to zero when overflows. In this case, the second check is // unnecessary, so we won't generate check2 when the SampledInstrPeriod is -// set to 65535 (64K - 1). The code after: -// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { +// set to 65536 (64K). The code after: +// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) { // Increment_Instruction; // } // __llvm_profile_sampling__ += 1; // // (3) Simple sampling: -// When SampledInstrBurstDuration sets to 1, we do a simple sampling: +// When SampledInstrBurstDuration is set to 1, we do a simple sampling: // __llvm_profile_sampling__ += 1; // if (__llvm_profile_sampling__ >= SampledInstrPeriod) { // __llvm_profile_sampling__ = 0; @@ -706,27 +734,16 @@ void InstrLowerer::doSampling(Instruction *I) { if (!isSamplingEnabled()) return; - unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue(); - unsigned SampledPeriod = SampledInstrPeriod.getValue(); - if (SampledBurstDuration >= SampledPeriod) { - report_fatal_error( - "SampledPeriod needs to be greater than SampledBurstDuration"); - } - bool UseShort = (SampledPeriod <= USHRT_MAX); - bool IsSimpleSampling = (SampledBurstDuration == 1); - // If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate - // the simple sampling style code. - bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535); - - auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) { - if (UseShort) + SampledInstrumentationConfig config = getSampledInstrumentationConfig(); + auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) { + if (config.UseShort) return Builder.getInt16(C); else return Builder.getInt32(C); }; IntegerType *SamplingVarTy; - if (UseShort) + if (config.UseShort) SamplingVarTy = Type::getInt16Ty(M.getContext()); else SamplingVarTy = Type::getInt32Ty(M.getContext()); @@ -741,18 +758,18 @@ void InstrLowerer::doSampling(Instruction *I) { MDNode *BranchWeight; IRBuilder<> CondBuilder(I); auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar); - if (IsSimpleSampling) { + if (config.IsSimpleSampling) { // For the simple sampling, just create the load and increments. IRBuilder<> IncBuilder(I); NewSamplingVarVal = IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); } else { - // For the bust-sampling, create the conditonal update. + // For the burst-sampling, create the conditional update. auto *DurationCond = CondBuilder.CreateICmpULE( - LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration)); + LoadSamplingVar, GetConstant(CondBuilder, config.BurstDuration - 1)); BranchWeight = MDB.createBranchWeights( - SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration); + config.BurstDuration, config.Period - config.BurstDuration); Instruction *ThenTerm = SplitBlockAndInsertIfThen( DurationCond, I, /* Unreachable */ false, BranchWeight); IRBuilder<> IncBuilder(I); @@ -762,20 +779,20 @@ void InstrLowerer::doSampling(Instruction *I) { I->moveBefore(ThenTerm); } - if (IsFastSampling) + if (config.IsFastSampling) return; - // Create the condtion for checking the period. + // Create the condition for checking the period. Instruction *ThenTerm, *ElseTerm; IRBuilder<> PeriodCondBuilder(SamplingVarIncr); auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE( - NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod)); - BranchWeight = MDB.createBranchWeights(1, SampledPeriod); + NewSamplingVarVal, GetConstant(PeriodCondBuilder, config.Period)); + BranchWeight = MDB.createBranchWeights(1, config.Period - 1); SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm, &ElseTerm, BranchWeight); // For the simple sampling, the counter update happens in sampling var reset. - if (IsSimpleSampling) + if (config.IsSimpleSampling) I->moveBefore(ThenTerm); IRBuilder<> ResetBuilder(ThenTerm); @@ -2138,7 +2155,7 @@ void createProfileSamplingVar(Module &M) { const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); IntegerType *SamplingVarTy; Constant *ValueZero; - if (SampledInstrPeriod.getValue() <= USHRT_MAX) { + if (getSampledInstrumentationConfig().UseShort) { SamplingVarTy = Type::getInt16Ty(M.getContext()); ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0)); } else { diff --git a/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll b/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll index 9d083fe04015e..43377f695be67 100644 --- a/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll +++ b/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll @@ -6,7 +6,7 @@ define void @foo(i32 %n, i32 %N) { ; SAMPLING-LABEL: @foo ; SAMPLING: %[[VV0:[0-9]+]] = load i16, ptr @__llvm_profile_sampling, align 2 -; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 200 +; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 199 ; SAMPLING: br i1 %[[VV1]], label {{.*}}, label {{.*}}, !prof !0 ; SAMPLING: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3) ; SAMPLING-NEXT: add diff --git a/llvm/test/Transforms/PGOProfile/cspgo_sample.ll b/llvm/test/Transforms/PGOProfile/cspgo_sample.ll index 97ad4d00c9d9c..07f1e2d8a09ee 100644 --- a/llvm/test/Transforms/PGOProfile/cspgo_sample.ll +++ b/llvm/test/Transforms/PGOProfile/cspgo_sample.ll @@ -53,7 +53,7 @@ for.end: ; CSGEN-LABEL: @foo ; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201 +; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 200 ; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]] ; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1 ; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2 @@ -67,7 +67,7 @@ entry: } ; CSGEN-LABEL: @main ; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201 +; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 200 ; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]] ; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1 ; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2 diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll index dcc1e805ba6f6..56d8364d8f543 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s --passes=instrprof --sampled-instrumentation -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION,SAMPLE-WEIGHT ; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=100 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION100,SAMPLE-WEIGHT100 +; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=65536 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,UNSAMPLED-DURATION,UNSAMPLED-WEIGHT target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -23,8 +24,9 @@ define void @f() { ; SAMPLE-CODE-LABEL: @f( ; SAMPLE-CODE: entry: ; SAMPLE-CODE-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 200 -; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 100 +; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 199 +; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 99 +; UNSAMPLED-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], -1 ; SAMPLE-CODE: br i1 [[TMP1]], label %[[TMP2:.*]], label %[[TMP4:.*]], !prof !0 ; SAMPLE-CODE: [[TMP2]]: ; SAMPLE-CODE-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f @@ -43,5 +45,6 @@ entry: ; SAMPLE-WEIGHT: !0 = !{!"branch_weights", i32 200, i32 65336} ; SAMPLE-WEIGHT100: !0 = !{!"branch_weights", i32 100, i32 65436} +; UNSAMPLED-WEIGHT: !0 = !{!"branch_weights", i32 65536, i32 0} declare void @llvm.instrprof.increment(i8*, i64, i32, i32) diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll index 57d1a0cd33fbe..726df2886ca84 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s +; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s --check-prefixes=CHECK,CHECK-32 +; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=1009 -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAMPLED target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -13,7 +14,8 @@ define void @f() { ; CHECK-LABEL: define void @f() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 32 +; CHECK-32-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 31 +; CHECK-UNSAMPLED-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 1008 ; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]] ; CHECK: [[BB2]]: ; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 @@ -40,6 +42,8 @@ entry: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 32, i32 978} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1009} +; CHECK-32: [[PROF0]] = !{!"branch_weights", i32 32, i32 977} +; CHECK-32: [[PROF1]] = !{!"branch_weights", i32 1, i32 1008} +; CHECK-UNSAMPLED: [[PROF0]] = !{!"branch_weights", i32 1009, i32 0} +; CHECK-UNSAMPLED: [[PROF1]] = !{!"branch_weights", i32 1, i32 1008} ;. diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll index 1ad889524bc6a..2d6323c103471 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll @@ -13,7 +13,7 @@ define void @f() { ; CHECK-LABEL: define void @f() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 3000 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 2999 ; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]] ; CHECK: [[BB2]]: ; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 @@ -40,6 +40,6 @@ entry: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) ;. -; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997020} -; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000019} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997019} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000018} ;. diff --git a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll index 8e846bbf1d982..5ef93af881c4b 100644 --- a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll +++ b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll @@ -31,18 +31,18 @@ define void @f() { ; ; DEFAULTPERIOD-LABEL: define void @f() { ; DEFAULTPERIOD-NEXT: [[ENTRY:.*:]] -; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 -; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 -; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i16 [[TMP1]], -1 +; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4 +; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1 +; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i32 [[TMP1]], 65536 ; DEFAULTPERIOD-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB5:.*]], !prof [[PROF0:![0-9]+]] ; DEFAULTPERIOD: [[BB3]]: ; DEFAULTPERIOD-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 ; DEFAULTPERIOD-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1 ; DEFAULTPERIOD-NEXT: store i64 [[TMP4]], ptr @__profc_f, align 8 -; DEFAULTPERIOD-NEXT: store i16 0, ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: store i32 0, ptr @__llvm_profile_sampling, align 4 ; DEFAULTPERIOD-NEXT: br label %[[BB6:.*]] ; DEFAULTPERIOD: [[BB5]]: -; DEFAULTPERIOD-NEXT: store i16 [[TMP1]], ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: store i32 [[TMP1]], ptr @__llvm_profile_sampling, align 4 ; DEFAULTPERIOD-NEXT: br label %[[BB6]] ; DEFAULTPERIOD: [[BB6]]: ; DEFAULTPERIOD-NEXT: ret void @@ -54,7 +54,7 @@ entry: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) ;. -; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1009} +; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1008} ;. ; DEFAULTPERIOD: [[PROF0]] = !{!"branch_weights", i32 1, i32 65535} ;.