Skip to content

Commit 7236893

Browse files
committed
add a positive test
Created using spr 1.3.5-bogner
2 parents 4285cd4 + 5aa1275 commit 7236893

File tree

63 files changed

+1973
-349
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1973
-349
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,10 @@ X86 Support
628628
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
629629
``*_(mask(z)))_minmax_s[s|d|h]``.
630630

631+
- Supported intrinsics for ``SM4 and AVX10.2``.
632+
* Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and
633+
``_mm512_sm4rnds4_epi32``.
634+
631635
- All intrinsics in adcintrin.h can now be used in constant expressions.
632636

633637
- All intrinsics in adxintrin.h can now be used in constant expressions.

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
21792179
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
21802180
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
21812181

2182+
// SM4_EVEX
2183+
TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
2184+
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
2185+
21822186
// AVX10 MINMAX
21832187
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
21842188
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20492,8 +20492,8 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
2049220492
#undef MMA_VARIANTS_B1_XOR
2049320493
}
2049420494

20495-
static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
20496-
const CallExpr *E) {
20495+
static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
20496+
const CallExpr *E) {
2049720497
Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
2049820498
QualType ArgType = E->getArg(0)->getType();
2049920499
clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
@@ -20503,6 +20503,21 @@ static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
2050320503
{Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
2050420504
}
2050520505

20506+
static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
20507+
Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20508+
QualType ArgType = E->getArg(0)->getType();
20509+
clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
20510+
llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
20511+
20512+
// Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
20513+
auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
20514+
auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
20515+
MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
20516+
LD->setMetadata(LLVMContext::MD_invariant_load, MD);
20517+
20518+
return LD;
20519+
}
20520+
2050620521
static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
2050720522
const CallExpr *E) {
2050820523
Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
@@ -20536,9 +20551,11 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
2053620551
return nullptr;
2053720552
}
2053820553

20539-
if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
20540-
IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20541-
return MakeLdgLdu(IntrinsicID, CGF, E);
20554+
if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
20555+
return MakeLdg(CGF, E);
20556+
20557+
if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20558+
return MakeLdu(IntrinsicID, CGF, E);
2054220559

2054320560
SmallVector<Value *, 16> Args;
2054420561
auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
@@ -20675,16 +20692,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
2067520692
case NVPTX::BI__nvvm_ldg_ul2:
2067620693
case NVPTX::BI__nvvm_ldg_ull:
2067720694
case NVPTX::BI__nvvm_ldg_ull2:
20678-
// PTX Interoperability section 2.2: "For a vector with an even number of
20679-
// elements, its alignment is set to number of elements times the alignment
20680-
// of its member: n*alignof(t)."
20681-
return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
2068220695
case NVPTX::BI__nvvm_ldg_f:
2068320696
case NVPTX::BI__nvvm_ldg_f2:
2068420697
case NVPTX::BI__nvvm_ldg_f4:
2068520698
case NVPTX::BI__nvvm_ldg_d:
2068620699
case NVPTX::BI__nvvm_ldg_d2:
20687-
return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20700+
// PTX Interoperability section 2.2: "For a vector with an even number of
20701+
// elements, its alignment is set to number of elements times the alignment
20702+
// of its member: n*alignof(t)."
20703+
return MakeLdg(*this, E);
2068820704

2068920705
case NVPTX::BI__nvvm_ldu_c:
2069020706
case NVPTX::BI__nvvm_ldu_sc:
@@ -20715,13 +20731,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
2071520731
case NVPTX::BI__nvvm_ldu_ul2:
2071620732
case NVPTX::BI__nvvm_ldu_ull:
2071720733
case NVPTX::BI__nvvm_ldu_ull2:
20718-
return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20734+
return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
2071920735
case NVPTX::BI__nvvm_ldu_f:
2072020736
case NVPTX::BI__nvvm_ldu_f2:
2072120737
case NVPTX::BI__nvvm_ldu_f4:
2072220738
case NVPTX::BI__nvvm_ldu_d:
2072320739
case NVPTX::BI__nvvm_ldu_d2:
20724-
return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20740+
return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
2072520741

2072620742
case NVPTX::BI__nvvm_atom_cta_add_gen_i:
2072720743
case NVPTX::BI__nvvm_atom_cta_add_gen_l:
@@ -21195,14 +21211,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
2119521211
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
2119621212
*this);
2119721213
case NVPTX::BI__nvvm_ldg_h:
21198-
return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
2119921214
case NVPTX::BI__nvvm_ldg_h2:
21200-
return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
21215+
return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
2120121216
case NVPTX::BI__nvvm_ldu_h:
21217+
case NVPTX::BI__nvvm_ldu_h2:
2120221218
return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21203-
case NVPTX::BI__nvvm_ldu_h2: {
21204-
return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21205-
}
2120621219
case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
2120721220
return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
2120821221
Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ set(x86_files
243243
shaintrin.h
244244
sm3intrin.h
245245
sm4intrin.h
246+
sm4evexintrin.h
246247
smmintrin.h
247248
tbmintrin.h
248249
tmmintrin.h

clang/lib/Headers/immintrin.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,11 @@ _storebe_i64(void * __P, long long __D) {
677677
#include <avx10_2_512satcvtintrin.h>
678678
#endif
679679

680+
#if !defined(__SCE__) || __has_feature(modules) || \
681+
(defined(__AVX10_2_512__) && defined(__SM4__))
682+
#include <sm4evexintrin.h>
683+
#endif
684+
680685
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
681686
#include <enqcmdintrin.h>
682687
#endif

clang/lib/Headers/sm4evexintrin.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===----------------------------------------------------------------------===
8+
*/
9+
#ifndef __IMMINTRIN_H
10+
#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
11+
#endif // __IMMINTRIN_H
12+
13+
#ifndef __SM4EVEXINTRIN_H
14+
#define __SM4EVEXINTRIN_H
15+
16+
#define __DEFAULT_FN_ATTRS512 \
17+
__attribute__((__always_inline__, __nodebug__, \
18+
__target__("sm4,avx10.2-512"), __min_vector_width__(512)))
19+
20+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
21+
_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
22+
return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
23+
}
24+
25+
static __inline__ __m512i __DEFAULT_FN_ATTRS512
26+
_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
27+
return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
28+
}
29+
30+
#undef __DEFAULT_FN_ATTRS512
31+
32+
#endif // __SM4EVEXINTRIN_H
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
2+
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
3+
// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
4+
// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
5+
6+
#include <immintrin.h>
7+
#include <stddef.h>
8+
9+
__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
10+
// CHECK-LABEL: @test_mm512_sm4key4_epi32(
11+
// CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
12+
return _mm512_sm4key4_epi32(__A, __B);
13+
}
14+
15+
__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
16+
// CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
17+
// CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
18+
return _mm512_sm4rnds4_epi32(__A, __B);
19+
}

clang/test/CodeGen/builtins-nvptx-native-half-type-native.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
5252
// CHECK: call <2 x half> @llvm.nvvm.fmax.ftz.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}})
5353
// CHECK: call <2 x half> @llvm.nvvm.fmax.nan.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}})
5454
// CHECK: call <2 x half> @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}})
55-
// CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0(ptr {{.*}}, i32 2)
56-
// CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0(ptr {{.*}}, i32 4)
55+
// CHECK: load half, ptr addrspace(1) {{.*}}, align 2, !invariant.load
56+
// CHECK: load <2 x half>, ptr addrspace(1) {{.*}}, align 4, !invariant.load
5757
// CHECK: call half @llvm.nvvm.ldu.global.f.f16.p0(ptr {{.*}}, i32 2)
5858
// CHECK: call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p0(ptr {{.*}}, i32 4)
5959
__device__ void nvvm_native_half_types(void *a, void*b, void*c, __fp16* out) {

clang/test/CodeGen/builtins-nvptx-native-half-type.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,9 @@ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
177177

178178
// CHECK-LABEL: nvvm_ldg_native_half_types
179179
__device__ void nvvm_ldg_native_half_types(const void *p) {
180-
// CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0
180+
// CHECK: load half, ptr addrspace(1) {{.*}}, align 2, !invariant.load
181181
__nvvm_ldg_h((const __fp16 *)p);
182-
// CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0
182+
// CHECK: load <2 x half>, ptr addrspace(1) {{.*}}, align 4, !invariant.load
183183
__nvvm_ldg_h2((const __fp16v2 *)p);
184184
}
185185

clang/test/CodeGen/builtins-nvptx.c

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -598,33 +598,33 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df,
598598

599599
// CHECK-LABEL: nvvm_ldg
600600
__device__ void nvvm_ldg(const void *p) {
601-
// CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
602-
// CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
603-
// CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
601+
// CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load
602+
// CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load
603+
// CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load
604604
__nvvm_ldg_c((const char *)p);
605605
__nvvm_ldg_uc((const unsigned char *)p);
606606
__nvvm_ldg_sc((const signed char *)p);
607607

608-
// CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2)
609-
// CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2)
608+
// CHECK: load i16, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
609+
// CHECK: load i16, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
610610
__nvvm_ldg_s((const short *)p);
611611
__nvvm_ldg_us((const unsigned short *)p);
612612

613-
// CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
614-
// CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
613+
// CHECK: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
614+
// CHECK: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
615615
__nvvm_ldg_i((const int *)p);
616616
__nvvm_ldg_ui((const unsigned int *)p);
617617

618-
// LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
619-
// LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
620-
// LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8)
621-
// LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8)
618+
// LP32: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
619+
// LP32: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
620+
// LP64: load i64, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
621+
// LP64: load i64, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
622622
__nvvm_ldg_l((const long *)p);
623623
__nvvm_ldg_ul((const unsigned long *)p);
624624

625-
// CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0(ptr {{%[0-9]+}}, i32 4)
625+
// CHECK: load float, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
626626
__nvvm_ldg_f((const float *)p);
627-
// CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0(ptr {{%[0-9]+}}, i32 8)
627+
// CHECK: load double, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
628628
__nvvm_ldg_d((const double *)p);
629629

630630
// In practice, the pointers we pass to __ldg will be aligned as appropriate
@@ -636,79 +636,79 @@ __device__ void nvvm_ldg(const void *p) {
636636
// elements, its alignment is set to number of elements times the alignment of
637637
// its member: n*alignof(t)."
638638

639-
// CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2)
640-
// CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2)
641-
// CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2)
639+
// CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
640+
// CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
641+
// CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
642642
typedef char char2 __attribute__((ext_vector_type(2)));
643643
typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
644644
typedef signed char schar2 __attribute__((ext_vector_type(2)));
645645
__nvvm_ldg_c2((const char2 *)p);
646646
__nvvm_ldg_uc2((const uchar2 *)p);
647647
__nvvm_ldg_sc2((const schar2 *)p);
648648

649-
// CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4)
650-
// CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4)
651-
// CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4)
649+
// CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
650+
// CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
651+
// CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
652652
typedef char char4 __attribute__((ext_vector_type(4)));
653653
typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
654654
typedef signed char schar4 __attribute__((ext_vector_type(4)));
655655
__nvvm_ldg_c4((const char4 *)p);
656656
__nvvm_ldg_uc4((const uchar4 *)p);
657657
__nvvm_ldg_sc4((const schar4 *)p);
658658

659-
// CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4)
660-
// CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4)
659+
// CHECK: load <2 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
660+
// CHECK: load <2 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
661661
typedef short short2 __attribute__((ext_vector_type(2)));
662662
typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
663663
__nvvm_ldg_s2((const short2 *)p);
664664
__nvvm_ldg_us2((const ushort2 *)p);
665665

666-
// CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0(ptr {{%[0-9]+}}, i32 8)
667-
// CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0(ptr {{%[0-9]+}}, i32 8)
666+
// CHECK: load <4 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
667+
// CHECK: load <4 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
668668
typedef short short4 __attribute__((ext_vector_type(4)));
669669
typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
670670
__nvvm_ldg_s4((const short4 *)p);
671671
__nvvm_ldg_us4((const ushort4 *)p);
672672

673-
// CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
674-
// CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
673+
// CHECK: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
674+
// CHECK: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
675675
typedef int int2 __attribute__((ext_vector_type(2)));
676676
typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
677677
__nvvm_ldg_i2((const int2 *)p);
678678
__nvvm_ldg_ui2((const uint2 *)p);
679679

680-
// CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0(ptr {{%[0-9]+}}, i32 16)
681-
// CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0(ptr {{%[0-9]+}}, i32 16)
680+
// CHECK: load <4 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
681+
// CHECK: load <4 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
682682
typedef int int4 __attribute__((ext_vector_type(4)));
683683
typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
684684
__nvvm_ldg_i4((const int4 *)p);
685685
__nvvm_ldg_ui4((const uint4 *)p);
686686

687-
// LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
688-
// LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
689-
// LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
690-
// LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
687+
// LP32: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
688+
// LP32: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
689+
// LP64: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
690+
// LP64: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
691691
typedef long long2 __attribute__((ext_vector_type(2)));
692692
typedef unsigned long ulong2 __attribute__((ext_vector_type(2)));
693693
__nvvm_ldg_l2((const long2 *)p);
694694
__nvvm_ldg_ul2((const ulong2 *)p);
695695

696-
// CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
697-
// CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
696+
// CHECK: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
697+
// CHECK: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
698698
typedef long long longlong2 __attribute__((ext_vector_type(2)));
699699
typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2)));
700700
__nvvm_ldg_ll2((const longlong2 *)p);
701701
__nvvm_ldg_ull2((const ulonglong2 *)p);
702702

703-
// CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0(ptr {{%[0-9]+}}, i32 8)
703+
// CHECK: load <2 x float>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
704704
typedef float float2 __attribute__((ext_vector_type(2)));
705705
__nvvm_ldg_f2((const float2 *)p);
706706

707-
// CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0(ptr {{%[0-9]+}}, i32 16)
707+
// CHECK: load <4 x float>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
708708
typedef float float4 __attribute__((ext_vector_type(4)));
709709
__nvvm_ldg_f4((const float4 *)p);
710710

711-
// CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0(ptr {{%[0-9]+}}, i32 16)
711+
// CHECK: load <2 x double>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
712712
typedef double double2 __attribute__((ext_vector_type(2)));
713713
__nvvm_ldg_d2((const double2 *)p);
714714
}

0 commit comments

Comments
 (0)