Skip to content

Commit 078184a

Browse files
authored
Merge branch 'main' into main-merge-true16-vop3-mc-more-instructions-5
2 parents 38fe675 + bfd0510 commit 078184a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+2287
-736
lines changed

clang/lib/CodeGen/MicrosoftCXXABI.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3454,7 +3454,7 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion(
34543454
if (inheritanceModelHasOnlyOneField(IsFunc, DstInheritance)) {
34553455
Dst = FirstField;
34563456
} else {
3457-
Dst = llvm::UndefValue::get(ConvertMemberPointerType(DstTy));
3457+
Dst = llvm::PoisonValue::get(ConvertMemberPointerType(DstTy));
34583458
unsigned Idx = 0;
34593459
Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++);
34603460
if (inheritanceModelHasNVOffsetField(IsFunc, DstInheritance))

clang/lib/Driver/SanitizerArgs.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,10 +1098,11 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
10981098
options::OPT_fno_sanitize_link_runtime, LinkRuntimes);
10991099

11001100
// Parse -link-cxx-sanitizer flag.
1101-
LinkCXXRuntimes = Args.hasArg(options::OPT_fsanitize_link_cxx_runtime,
1102-
options::OPT_fno_sanitize_link_cxx_runtime,
1103-
LinkCXXRuntimes) ||
1104-
D.CCCIsCXX();
1101+
LinkCXXRuntimes =
1102+
D.CCCIsCXX() && !Args.hasArg(clang::driver::options::OPT_nostdlibxx);
1103+
LinkCXXRuntimes =
1104+
Args.hasFlag(options::OPT_fsanitize_link_cxx_runtime,
1105+
options::OPT_fno_sanitize_link_cxx_runtime, LinkCXXRuntimes);
11051106

11061107
NeedsMemProfRt = Args.hasFlag(options::OPT_fmemory_profile,
11071108
options::OPT_fmemory_profile_EQ,

clang/test/CodeGenCXX/microsoft-abi-member-pointers.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ void (Multiple::*convertB2FuncToMultiple(void (B2::*mp)()))() {
647647
// CHECK: br i1 %{{.*}} label %{{.*}}, label %{{.*}}
648648
//
649649
// memptr.convert: ; preds = %entry
650-
// CHECK: insertvalue { ptr, i32 } undef, ptr %[[mp]], 0
650+
// CHECK: insertvalue { ptr, i32 } poison, ptr %[[mp]], 0
651651
// CHECK: insertvalue { ptr, i32 } %{{.*}}, i32 4, 1
652652
// CHECK: br label
653653
//
@@ -705,7 +705,7 @@ void (D::*convertCToD(void (C::*mp)()))() {
705705
// CHECK: %[[nv_adj:.*]] = select i1 %[[is_nvbase]], i32 %[[nv_disp]], i32 0
706706
// CHECK: %[[dst_adj:.*]] = select i1 %[[is_nvbase]], i32 4, i32 0
707707
// CHECK: %[[adj:.*]] = sub nsw i32 %[[nv_adj]], %[[dst_adj]]
708-
// CHECK: insertvalue { ptr, i32, i32 } undef, ptr {{.*}}, 0
708+
// CHECK: insertvalue { ptr, i32, i32 } poison, ptr {{.*}}, 0
709709
// CHECK: insertvalue { ptr, i32, i32 } {{.*}}, i32 %[[adj]], 1
710710
// CHECK: insertvalue { ptr, i32, i32 } {{.*}}, i32 {{.*}}, 2
711711
// CHECK: br label

clang/test/Driver/sanitizer-ld.c

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -132,18 +132,81 @@
132132
// RUN: -resource-dir=%S/Inputs/empty_resource_dir \
133133
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
134134
// RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-CXX %s
135-
//
136-
// CHECK-ASAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
135+
136+
// RUN: %clangxx -### %s 2>&1 \
137+
// RUN: --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \
138+
// RUN: -resource-dir=%S/Inputs/empty_resource_dir \
139+
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
140+
// RUN: -fsanitize-link-c++-runtime \
141+
// RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-CXX %s
142+
137143
// CHECK-ASAN-LINUX-CXX-NOT: "-lc"
138-
// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive"
139-
// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx.a" "--no-whole-archive"
140144
// CHECK-ASAN-LINUX-CXX-NOT: "--dynamic-list"
141-
// CHECK-ASAN-LINUX-CXX: "--export-dynamic"
142-
// CHECK-ASAN-LINUX-CXX: stdc++
143-
// CHECK-ASAN-LINUX-CXX: "-lpthread"
144-
// CHECK-ASAN-LINUX-CXX: "-lrt"
145-
// CHECK-ASAN-LINUX-CXX: "-ldl"
146-
// CHECK-ASAN-LINUX-CXX: "-lresolv"
145+
// CHECK-ASAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
146+
// CHECK-ASAN-LINUX-CXX-SAME: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive"
147+
// CHECK-ASAN-LINUX-CXX-SAME: "--whole-archive" "{{.*}}libclang_rt.asan_cxx.a" "--no-whole-archive"
148+
// CHECK-ASAN-LINUX-CXX-SAME: "--export-dynamic"
149+
// CHECK-ASAN-LINUX-CXX-SAME: stdc++
150+
// CHECK-ASAN-LINUX-CXX-SAME: "-lpthread"
151+
// CHECK-ASAN-LINUX-CXX-SAME: "-lrt"
152+
// CHECK-ASAN-LINUX-CXX-SAME: "-ldl"
153+
// CHECK-ASAN-LINUX-CXX-SAME: "-lresolv"
154+
155+
// RUN: %clang -### %s 2>&1 \
156+
// RUN: --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \
157+
// RUN: -resource-dir=%S/Inputs/empty_resource_dir \
158+
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
159+
// RUN: -fno-sanitize-link-c++-runtime \
160+
// RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-CNOCXX %s
161+
162+
// CHECK-ASAN-LINUX-CNOCXX-NOT: "-lc"
163+
// CHECK-ASAN-LINUX-CNOCXX-NOT: libclang_rt.asan_cxx
164+
// CHECK-ASAN-LINUX-CNOCXX-NOT: "--dynamic-list"
165+
// CHECK-ASAN-LINUX-CNOCXX-NOT: stdc++
166+
// CHECK-ASAN-LINUX-CNOCXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
167+
// CHECK-ASAN-LINUX-CNOCXX-SAME: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive"
168+
// CHECK-ASAN-LINUX-CNOCXX-SAME: "--export-dynamic"
169+
// CHECK-ASAN-LINUX-CNOCXX-SAME: "-lpthread"
170+
// CHECK-ASAN-LINUX-CNOCXX-SAME: "-lrt"
171+
// CHECK-ASAN-LINUX-CNOCXX-SAME: "-ldl"
172+
// CHECK-ASAN-LINUX-CNOCXX-SAME: "-lresolv"
173+
174+
// RUN: %clangxx -### %s 2>&1 \
175+
// RUN: --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \
176+
// RUN: -resource-dir=%S/Inputs/empty_resource_dir \
177+
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
178+
// RUN: -fno-sanitize-link-c++-runtime \
179+
// RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-NOCXX %s
180+
181+
// CHECK-ASAN-LINUX-NOCXX-NOT: "-lc"
182+
// CHECK-ASAN-LINUX-NOCXX-NOT: libclang_rt.asan_cxx
183+
// CHECK-ASAN-LINUX-NOCXX-NOT: "--dynamic-list"
184+
// CHECK-ASAN-LINUX-NOCXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
185+
// CHECK-ASAN-LINUX-NOCXX-SAME: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive"
186+
// CHECK-ASAN-LINUX-NOCXX-SAME: "--export-dynamic"
187+
// CHECK-ASAN-LINUX-NOCXX-SAME: stdc++
188+
// CHECK-ASAN-LINUX-NOCXX-SAME: "-lpthread"
189+
// CHECK-ASAN-LINUX-NOCXX-SAME: "-lrt"
190+
// CHECK-ASAN-LINUX-NOCXX-SAME: "-ldl"
191+
// CHECK-ASAN-LINUX-NOCXX-SAME: "-lresolv"
192+
193+
// RUN: %clangxx -### %s 2>&1 \
194+
// RUN: --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform -fsanitize=address \
195+
// RUN: -resource-dir=%S/Inputs/empty_resource_dir \
196+
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
197+
// RUN: -nostdlib++ \
198+
// RUN: | FileCheck --check-prefix=CHECK-ASAN-LINUX-NOSTDCXX %s
199+
200+
// CHECK-ASAN-LINUX-NOSTDCXX-NOT: "-lc"
201+
// CHECK-ASAN-LINUX-NOSTDCXX-NOT: libclang_rt.asan_cxx
202+
// CHECK-ASAN-LINUX-NOSTDCXX-NOT: "--dynamic-list"
203+
// CHECK-ASAN-LINUX-NOSTDCXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
204+
// CHECK-ASAN-LINUX-NOSTDCXX-SAME: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive"
205+
// CHECK-ASAN-LINUX-NOSTDCXX-SAME: "--export-dynamic"
206+
// CHECK-ASAN-LINUX-NOSTDCXX-SAME: "-lpthread"
207+
// CHECK-ASAN-LINUX-NOSTDCXX-SAME: "-lrt"
208+
// CHECK-ASAN-LINUX-NOSTDCXX-SAME: "-ldl"
209+
// CHECK-ASAN-LINUX-NOSTDCXX-SAME: "-lresolv"
147210

148211
// RUN: %clang -### %s -o /dev/null -fsanitize=address \
149212
// RUN: --target=i386-unknown-linux -fuse-ld=ld -stdlib=platform \

compiler-rt/test/hwasan/TestCases/sizes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// This test requires operator new to be intercepted by the hwasan runtime,
22
// so we need to avoid linking against libc++.
3-
// RUN: %clangxx_hwasan %s -nostdlib++ -lstdc++ -o %t || %clangxx_hwasan %s -o %t
3+
// RUN: %clangxx_hwasan %s -nostdlib++ -lstdc++ -fsanitize-link-c++-runtime -o %t || %clangxx_hwasan %s -o %t
44
// RUN: %env_hwasan_opts=allocator_may_return_null=0 not %run %t malloc 2>&1 | FileCheck %s --check-prefix=CHECK-max
55
// RUN: %env_hwasan_opts=allocator_may_return_null=1 %run %t malloc 2>&1
66
// RUN: %env_hwasan_opts=allocator_may_return_null=0 not %run %t malloc max 2>&1 | FileCheck %s --check-prefix=CHECK-max

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -323,21 +323,6 @@ struct Frame {
323323
<< " Column: " << Column << "\n"
324324
<< " Inline: " << IsInlineFrame << "\n";
325325
}
326-
327-
// Return a hash value based on the contents of the frame. Here we use a
328-
// cryptographic hash function to minimize the chance of hash collisions. We
329-
// do persist FrameIds as part of memprof formats up to Version 2, inclusive.
330-
// However, the deserializer never calls this function; it uses FrameIds
331-
// merely as keys to look up Frames proper.
332-
inline FrameId hash() const {
333-
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
334-
HashBuilder;
335-
HashBuilder.add(Function, LineOffset, Column, IsInlineFrame);
336-
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
337-
FrameId Id;
338-
std::memcpy(&Id, Hash.data(), sizeof(Hash));
339-
return Id;
340-
}
341326
};
342327

343328
// A type representing the index into the table of call stacks.
@@ -775,9 +760,6 @@ class CallStackLookupTrait {
775760
}
776761
};
777762

778-
// Compute a CallStackId for a given call stack.
779-
CallStackId hashCallStack(ArrayRef<FrameId> CS);
780-
781763
namespace detail {
782764
// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
783765
// to do so in one of two different ways depending on the type of the hash
@@ -1011,7 +993,7 @@ struct IndexedMemProfData {
1011993
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks;
1012994

1013995
FrameId addFrame(const Frame &F) {
1014-
const FrameId Id = F.hash();
996+
const FrameId Id = hashFrame(F);
1015997
Frames.try_emplace(Id, F);
1016998
return Id;
1017999
}
@@ -1027,6 +1009,25 @@ struct IndexedMemProfData {
10271009
CallStacks.try_emplace(CSId, std::move(CS));
10281010
return CSId;
10291011
}
1012+
1013+
private:
1014+
// Return a hash value based on the contents of the frame. Here we use a
1015+
// cryptographic hash function to minimize the chance of hash collisions. We
1016+
// do persist FrameIds as part of memprof formats up to Version 2, inclusive.
1017+
// However, the deserializer never calls this function; it uses FrameIds
1018+
// merely as keys to look up Frames proper.
1019+
FrameId hashFrame(const Frame &F) const {
1020+
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
1021+
HashBuilder;
1022+
HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame);
1023+
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
1024+
FrameId Id;
1025+
std::memcpy(&Id, Hash.data(), sizeof(Hash));
1026+
return Id;
1027+
}
1028+
1029+
// Compute a CallStackId for a given call stack.
1030+
CallStackId hashCallStack(ArrayRef<FrameId> CS) const;
10301031
};
10311032

10321033
struct FrameStat {

llvm/lib/ProfileData/MemProf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
290290
return Result;
291291
}
292292

293-
CallStackId hashCallStack(ArrayRef<FrameId> CS) {
293+
CallStackId IndexedMemProfData::hashCallStack(ArrayRef<FrameId> CS) const {
294294
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
295295
HashBuilder;
296296
for (FrameId F : CS)

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3377,15 +3377,22 @@ def : GCNPat <
33773377

33783378
} // end foreach Ty
33793379

3380+
} // End SubtargetPredicate = HasVOP3PInsts
33803381

33813382
let AddedComplexity = 5 in {
3382-
def : GCNPat <
3383+
class PackB32Pat<Instruction inst> : GCNPat <
33833384
(v2f16 (is_canonicalized_2<build_vector> (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)),
33843385
(f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))),
3385-
(V_PACK_B32_F16_e64 $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
3386+
(inst $src0_mods, VGPR_32:$src0, $src1_mods, VGPR_32:$src1)
33863387
>;
33873388
}
3388-
} // End SubtargetPredicate = HasVOP3PInsts
3389+
let SubtargetPredicate = isGFX9Plus in {
3390+
let True16Predicate = NotHasTrue16BitInsts in
3391+
def : PackB32Pat<V_PACK_B32_F16_e64>;
3392+
3393+
let True16Predicate = UseFakeTrue16Insts in
3394+
def : PackB32Pat<V_PACK_B32_F16_fake16_e64>;
3395+
} // End SubtargetPredicate = isGFX9Plus
33893396

33903397
// With multiple uses of the shift, this will duplicate the shift and
33913398
// increase register pressure.

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32
646646
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
647647
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
648648

649-
defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
649+
defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
650650

651651
let isReMaterializable = 1 in {
652652
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
@@ -1754,7 +1754,7 @@ defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
17541754
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
17551755
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
17561756
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
1757-
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
1757+
defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">;
17581758
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">;
17591759
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">;
17601760
defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,16 +405,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
405405

406406
let OtherPredicates = [HasDot7Insts] in {
407407
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
408-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
408+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
409409
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
410-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
410+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
411411
} // End OtherPredicates = [HasDot7Insts]
412412

413413
let OtherPredicates = [HasDot1Insts] in {
414414
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
415-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
415+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
416416
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
417-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
417+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
418418
} // End OtherPredicates = [HasDot1Insts]
419419

420420
def DOT2_BF16_Profile
@@ -433,7 +433,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
433433

434434
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
435435
let IsDOT = 1 in
436-
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
436+
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
437437
null_frag, 1>;
438438
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
439439
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.

0 commit comments

Comments
 (0)