Skip to content

Commit a14749c

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-induction-resume-values
2 parents c98b6d3 + 3654183 commit a14749c

File tree

5 files changed

+148
-43
lines changed

5 files changed

+148
-43
lines changed

libc/src/math/generic/log1p.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -822,8 +822,8 @@ constexpr Float128 BIG_COEFFS[4]{
822822
{Sign::NEG, -128, 0x80000000'00000000'00000000'00000000_u128},
823823
};
824824

825-
LIBC_INLINE double log1p_accurate(int e_x, int index,
826-
fputil::DoubleDouble m_x) {
825+
[[maybe_unused]] LIBC_INLINE double log1p_accurate(int e_x, int index,
826+
fputil::DoubleDouble m_x) {
827827
Float128 e_x_f128(static_cast<float>(e_x));
828828
Float128 sum = fputil::quick_mul(LOG_2, e_x_f128);
829829
sum = fputil::quick_add(sum, LOG_R1[index]);
@@ -882,7 +882,6 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
882882

883883
constexpr int EXP_BIAS = FPBits_t::EXP_BIAS;
884884
constexpr int FRACTION_LEN = FPBits_t::FRACTION_LEN;
885-
constexpr uint64_t FRACTION_MASK = FPBits_t::FRACTION_MASK;
886885
FPBits_t xbits(x);
887886
uint64_t x_u = xbits.uintval();
888887

@@ -954,12 +953,12 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
954953
// |x_dd.lo| < ulp(x_dd.hi)
955954

956955
FPBits_t xhi_bits(x_dd.hi);
956+
uint64_t xhi_frac = xhi_bits.get_mantissa();
957957
x_u = xhi_bits.uintval();
958958
// Range reduction:
959959
// Find k such that |x_hi - k * 2^-7| <= 2^-8.
960-
int idx =
961-
static_cast<int>(((x_u & FRACTION_MASK) + (1ULL << (FRACTION_LEN - 8))) >>
962-
(FRACTION_LEN - 7));
960+
int idx = static_cast<int>((xhi_frac + (1ULL << (FRACTION_LEN - 8))) >>
961+
(FRACTION_LEN - 7));
963962
int x_e = xhi_bits.get_exponent() + (idx >> 7);
964963
double e_x = static_cast<double>(x_e);
965964

@@ -974,17 +973,21 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
974973
constexpr double ERR_HI[2] = {0x1.0p-85, 0.0};
975974
double err_hi = ERR_HI[hi == 0.0];
976975

977-
// Scaling factior = 2^(-xh_bits.get_exponent())
978-
uint64_t s_u = (static_cast<uint64_t>(EXP_BIAS) << (FRACTION_LEN + 1)) -
979-
(x_u & FPBits_t::EXP_MASK);
980-
// When the exponent of x is 2^1023, its inverse, 2^(-1023), is subnormal.
981-
const double EXPONENT_CORRECTION[2] = {0.0, 0x1.0p-1023};
982-
double scaling = FPBits_t(s_u).get_val() + EXPONENT_CORRECTION[s_u == 0];
976+
// Scale x_dd by 2^(-xh_bits.get_exponent()).
977+
int64_t s_u = static_cast<int64_t>(x_u & FPBits_t::EXP_MASK) -
978+
(static_cast<int64_t>(EXP_BIAS) << FRACTION_LEN);
983979
// Normalize arguments:
984980
// 1 <= m_dd.hi < 2
985981
// |m_dd.lo| < 2^-52.
986982
// This is exact.
987-
fputil::DoubleDouble m_dd{scaling * x_dd.lo, scaling * x_dd.hi};
983+
uint64_t m_hi = FPBits_t::one().uintval() | xhi_frac;
984+
985+
uint64_t m_lo =
986+
FPBits_t(x_dd.lo).abs().get_val() > x_dd.hi * 0x1.0p-127
987+
? static_cast<uint64_t>(cpp::bit_cast<int64_t>(x_dd.lo) - s_u)
988+
: 0;
989+
990+
fputil::DoubleDouble m_dd{FPBits_t(m_lo).get_val(), FPBits_t(m_hi).get_val()};
988991

989992
// Perform range reduction:
990993
// r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1

libc/test/src/math/smoke/log1p_test.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
#include "test/UnitTest/FPMatcher.h"
1414
#include "test/UnitTest/Test.h"
1515

16-
#include <stdint.h>
17-
1816
using LlvmLibcLog1pTest = LIBC_NAMESPACE::testing::FPTest<double>;
1917

2018
TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
@@ -26,6 +24,9 @@ TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
2624
EXPECT_FP_EQ(neg_zero, LIBC_NAMESPACE::log1p(-0.0));
2725
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1p(-1.0),
2826
FE_DIVBYZERO);
27+
28+
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
29+
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
2930
}
3031

3132
#ifdef LIBC_TEST_FTZ_DAZ
@@ -36,18 +37,24 @@ TEST_F(LlvmLibcLog1pTest, FTZMode) {
3637
ModifyMXCSR mxcsr(FTZ);
3738

3839
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
40+
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
41+
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
3942
}
4043

4144
TEST_F(LlvmLibcLog1pTest, DAZMode) {
4245
ModifyMXCSR mxcsr(DAZ);
4346

4447
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
48+
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
49+
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
4550
}
4651

4752
TEST_F(LlvmLibcLog1pTest, FTZDAZMode) {
4853
ModifyMXCSR mxcsr(FTZ | DAZ);
4954

5055
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
56+
EXPECT_FP_EQ(0x1.62c829bf8fd9dp9,
57+
LIBC_NAMESPACE::log1p(0x1.9b536cac3a09dp1023));
5158
}
5259

5360
#endif

llvm/lib/Target/X86/X86InstrUtils.td

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,8 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
193193
bit HasREX_W = hasREX_W;
194194
}
195195

196-
def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
197-
198196
def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, Imm8, i8imm,
199-
imm_su, imm, i8imm, invalid_node, invalid_node,
197+
imm_su, imm, i8imm, null_frag, null_frag,
200198
1, 0>;
201199
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, Imm16, i16imm,
202200
imm_su, imm, i16i8imm, i16immSExt8_su, i16immSExt8,

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ cl::opt<bool> EnableMemProfContextDisambiguation(
132132
cl::opt<bool> SupportsHotColdNew(
133133
"supports-hot-cold-new", cl::init(false), cl::Hidden,
134134
cl::desc("Linking with hot/cold operator new interfaces"));
135+
136+
cl::opt<bool> MemProfRequireDefinitionForPromotion(
137+
"memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,
138+
cl::desc(
139+
"Require target function definition when promoting indirect calls"));
135140
} // namespace llvm
136141

137142
extern cl::opt<bool> MemProfReportHintedSizes;
@@ -4602,7 +4607,13 @@ void MemProfContextDisambiguation::performICP(
46024607
// target (or version of the code), and we need to be conservative
46034608
// (similar to what is done in the ICP pass).
46044609
Function *TargetFunction = Symtab->getFunction(Candidate.Value);
4605-
if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
4610+
if (TargetFunction == nullptr ||
4611+
// Any ThinLTO global dead symbol removal should have already
4612+
// occurred, so it should be safe to promote when the target is a
4613+
// declaration.
4614+
// TODO: Remove internal option once more fully tested.
4615+
(MemProfRequireDefinitionForPromotion &&
4616+
TargetFunction->isDeclaration())) {
46064617
ORE.emit([&]() {
46074618
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", CB)
46084619
<< "Memprof cannot promote indirect call: target with md5sum "

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 110 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@
9393
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
9494
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \
9595
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
96+
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
97+
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
9698
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
9799
; RUN: -r=%t/main.o,_Znwm, \
98100
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -113,9 +115,9 @@
113115
; RUN: -pass-remarks=. -save-temps \
114116
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
115117
; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS-MAIN \
116-
; RUN: --check-prefix=REMARKS-FOO
118+
; RUN: --check-prefix=REMARKS-FOO --check-prefix=REMARKS-FOO-IMPORT
117119

118-
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
120+
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR --check-prefix=IR-IMPORT
119121

120122
;; Try again but with distributed ThinLTO
121123
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
@@ -124,6 +126,8 @@
124126
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
125127
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \
126128
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
129+
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
130+
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
127131
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
128132
; RUN: -r=%t/main.o,_Znwm, \
129133
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -147,8 +151,9 @@
147151
; RUN: -enable-memprof-indirect-call-support=true \
148152
; RUN: -summary-file=%t/foo.o.thinlto.bc -memprof-import-summary=%t/foo.o.thinlto.bc \
149153
; RUN: -enable-import-metadata -stats -pass-remarks=. \
150-
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR \
151-
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO
154+
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR --check-prefix=IR-IMPORT \
155+
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO \
156+
; RUN: --check-prefix=REMARKS-FOO-IMPORT
152157

153158
;; Retry with the ICP-disabled object file, and make sure we disable it again
154159
;; so we don't look for the synthesized callsite records when applying imports.
@@ -159,6 +164,8 @@
159164
; RUN: -r=%t/foo.noicp.o,_Z3fooR2B0j,plx \
160165
; RUN: -r=%t/foo.noicp.o,_ZN2B03barEj.abc,plx \
161166
; RUN: -r=%t/foo.noicp.o,_Z3xyzR2B0j, \
167+
; RUN: -r=%t/foo.noicp.o,_ZN2B03barEj, \
168+
; RUN: -r=%t/foo.noicp.o,_ZN1B3barEj, \
162169
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
163170
; RUN: -r=%t/main.o,_Znwm, \
164171
; RUN: -r=%t/main.o,_ZdlPvm, \
@@ -184,6 +191,74 @@
184191
;; metadata.
185192
; RUN: llvm-dis %t.noicp.out.2.4.opt.bc -o - | FileCheck %s --implicit-check-not "_Z3fooR2B0j.memprof" --implicit-check-not "!callsite"
186193

194+
;; Run in-process ThinLTO again, but with importing disabled by setting the
195+
;; instruction limit to 0. Ensure that the existing declarations of B::bar
196+
;; and B0::bar are sufficient to allow for the promotion and cloning.
197+
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
198+
; RUN: -import-instr-limit=0 \
199+
; RUN: -enable-memprof-indirect-call-support=true \
200+
; RUN: -supports-hot-cold-new \
201+
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
202+
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \
203+
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
204+
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
205+
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
206+
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
207+
; RUN: -r=%t/main.o,_Znwm, \
208+
; RUN: -r=%t/main.o,_ZdlPvm, \
209+
; RUN: -r=%t/main.o,_Z8externalPi, \
210+
; RUN: -r=%t/main.o,main,plx \
211+
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
212+
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
213+
; RUN: -r=%t/main.o,_ZTV1B,plx \
214+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
215+
; RUN: -r=%t/main.o,_ZTS1B,plx \
216+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
217+
; RUN: -r=%t/main.o,_ZTS2B0,plx \
218+
; RUN: -r=%t/main.o,_ZTI2B0,plx \
219+
; RUN: -r=%t/main.o,_ZTI1B,plx \
220+
; RUN: -r=%t/main.o,_ZTV2B0,plx \
221+
; RUN: -thinlto-threads=1 \
222+
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \
223+
; RUN: -pass-remarks=. -save-temps \
224+
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
225+
; RUN: --check-prefix=STATS-BE-NOIMPORT --check-prefix=REMARKS-MAIN \
226+
; RUN: --check-prefix=REMARKS-FOO
227+
228+
; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR --check-prefix=IR-NOIMPORT
229+
230+
;; Run it gain but with -memprof-require-definition-for-promotion, and confirm
231+
;; that no promotions occur.
232+
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
233+
; RUN: -import-instr-limit=0 \
234+
; RUN: -memprof-require-definition-for-promotion \
235+
; RUN: -enable-memprof-indirect-call-support=true \
236+
; RUN: -supports-hot-cold-new \
237+
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
238+
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \
239+
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
240+
; RUN: -r=%t/foo.o,_ZN2B03barEj, \
241+
; RUN: -r=%t/foo.o,_ZN1B3barEj, \
242+
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
243+
; RUN: -r=%t/main.o,_Znwm, \
244+
; RUN: -r=%t/main.o,_ZdlPvm, \
245+
; RUN: -r=%t/main.o,_Z8externalPi, \
246+
; RUN: -r=%t/main.o,main,plx \
247+
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
248+
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
249+
; RUN: -r=%t/main.o,_ZTV1B,plx \
250+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
251+
; RUN: -r=%t/main.o,_ZTS1B,plx \
252+
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
253+
; RUN: -r=%t/main.o,_ZTS2B0,plx \
254+
; RUN: -r=%t/main.o,_ZTI2B0,plx \
255+
; RUN: -r=%t/main.o,_ZTI1B,plx \
256+
; RUN: -r=%t/main.o,_ZTV2B0,plx \
257+
; RUN: -thinlto-threads=1 \
258+
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
259+
; RUN: -pass-remarks=. \
260+
; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not Promote
261+
187262
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
188263
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
189264
; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1
@@ -208,51 +283,59 @@
208283
; REMARKS-FOO: call in clone _Z3fooR2B0j promoted and assigned to call function clone _ZN2B03barEj
209284
; REMARKS-FOO: Promote indirect call to _ZN2B03barEj with count 2 out of 2
210285
; REMARKS-FOO: call in clone _Z3fooR2B0j.memprof.1 promoted and assigned to call function clone _ZN2B03barEj.memprof.1
211-
; REMARKS-FOO: created clone _ZN2B03barEj.memprof.1
212-
; REMARKS-FOO: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold
213-
; REMARKS-FOO: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold
214-
; REMARKS-FOO: created clone _ZN1B3barEj.memprof.1
215-
; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
216-
; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
286+
; REMARKS-FOO-IMPORT: created clone _ZN2B03barEj.memprof.1
287+
; REMARKS-FOO-IMPORT: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold
288+
; REMARKS-FOO-IMPORT: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold
289+
; REMARKS-FOO-IMPORT: created clone _ZN1B3barEj.memprof.1
290+
; REMARKS-FOO-IMPORT: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
291+
; REMARKS-FOO-IMPORT: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
217292

218293
; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis
219294
; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
295+
; STATS-BE-NOIMPORT: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
220296
; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis
221297
; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
298+
; STATS-BE-NOIMPORT: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
222299
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
223300
; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
301+
; STATS-BE-NOIMPORT: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
224302

303+
; IR-NOIMPORT: foo
225304
; IR: define {{.*}} @_Z3fooR2B0j(
226-
; IR: %1 = icmp eq ptr %0, @_ZN1B3barEj
227-
; IR: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect
305+
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
306+
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
228307
; IR: if.true.direct_targ:
229-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
308+
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
309+
; IR-NOIMPORT: call {{.*}} @_ZN1B3barEj(
230310
; IR: if.false.orig_indirect:
231-
; IR: %2 = icmp eq ptr %0, @_ZN2B03barEj
232-
; IR: br i1 %2, label %if.true.direct_targ1, label %if.false.orig_indirect2
311+
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
312+
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
233313
; IR: if.true.direct_targ1:
234-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
314+
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
315+
; IR-NOIMPORT: call {{.*}} @_ZN2B03barEj(
235316
; IR: if.false.orig_indirect2:
236317
; IR: call {{.*}} %0
237318

238319
; IR: define {{.*}} @_Z3fooR2B0j.memprof.1(
239320
;; We should still compare against the original versions of bar since that is
240321
;; what is in the vtable. However, we should have called the cloned versions
241322
;; that perform cold allocations, which were subsequently inlined.
242-
; IR: %1 = icmp eq ptr %0, @_ZN1B3barEj
243-
; IR: br i1 %1, label %if.true.direct_targ, label %if.false.orig_indirect
323+
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
324+
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
244325
; IR: if.true.direct_targ:
245-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
326+
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
327+
; IR-NOIMPORT: call {{.*}} @_ZN1B3barEj.memprof.1(
246328
; IR: if.false.orig_indirect:
247-
; IR: %2 = icmp eq ptr %0, @_ZN2B03barEj
248-
; IR: br i1 %2, label %if.true.direct_targ1, label %if.false.orig_indirect2
329+
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
330+
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
249331
; IR: if.true.direct_targ1:
250-
; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
332+
; IR-IMPORT: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
333+
; IR-NOIMPORT: call {{.*}} @_ZN2B03barEj.memprof.1(
251334
; IR: if.false.orig_indirect2:
252335
; IR: call {{.*}} %0
253336

254-
; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
255-
; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
337+
; IR-IMPORT: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
338+
; IR-IMPORT: attributes #[[COLD]] = {{.*}} "memprof"="cold"
256339

257340
; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
258341
; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
@@ -272,6 +355,9 @@ define i32 @_ZN2B03barEj.abc(ptr %this, i32 %s) {
272355
ret i32 0
273356
}
274357

358+
declare i32 @_ZN2B03barEj(ptr %this, i32 %s)
359+
declare i32 @_ZN1B3barEj(ptr %this, i32 %s)
360+
275361
define i32 @_Z3fooR2B0j(ptr %b) {
276362
entry:
277363
%0 = load ptr, ptr %b, align 8

0 commit comments

Comments
 (0)