Skip to content

Commit 3c6e259

Browse files
pow2clkGreg RothV-FEXrt
authored
Allow native vector intrinsics with fixed component types (#7597)
Several intrinsics that support native vectors return the same element type regardless of overload. For native vectors, these need to return a vector of size matching the overload with the fixed element type. This provides the source generation to allow it and the changes to the related lowering operations to generate the correct DXIL. This includes some queries that return boolean information about the vector components in a boolean vector output and also the unary bit operations which operate on integers and return integers containing information about the component bits. Incidentally moves some intrinsics that have long since not been scalarized out of the scalarized test. They were updated to expect the new output, but were in the wrong location and not tested as thoroughly as their native vector counterparts. Fixes #7342 --------- Co-authored-by: Greg Roth <[email protected]> Co-authored-by: Ashley Coleman <[email protected]>
1 parent 86f5bb5 commit 3c6e259

10 files changed

+345
-74
lines changed

lib/DXIL/DxilOperations.cpp

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -113,32 +113,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
113113
"isSpecialFloat",
114114
Attribute::ReadNone,
115115
1,
116-
{{0x3}},
117-
{{0x0}}}, // Overloads: hf
116+
{{0x403}},
117+
{{0x3}}}, // Overloads: hf<hf
118118
{OC::IsInf,
119119
"IsInf",
120120
OCC::IsSpecialFloat,
121121
"isSpecialFloat",
122122
Attribute::ReadNone,
123123
1,
124-
{{0x3}},
125-
{{0x0}}}, // Overloads: hf
124+
{{0x403}},
125+
{{0x3}}}, // Overloads: hf<hf
126126
{OC::IsFinite,
127127
"IsFinite",
128128
OCC::IsSpecialFloat,
129129
"isSpecialFloat",
130130
Attribute::ReadNone,
131131
1,
132-
{{0x3}},
133-
{{0x0}}}, // Overloads: hf
132+
{{0x403}},
133+
{{0x3}}}, // Overloads: hf<hf
134134
{OC::IsNormal,
135135
"IsNormal",
136136
OCC::IsSpecialFloat,
137137
"isSpecialFloat",
138138
Attribute::ReadNone,
139139
1,
140-
{{0x3}},
141-
{{0x0}}}, // Overloads: hf
140+
{{0x403}},
141+
{{0x3}}}, // Overloads: hf<hf
142142
{OC::Cos,
143143
"Cos",
144144
OCC::Unary,
@@ -301,16 +301,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
301301
"unaryBits",
302302
Attribute::ReadNone,
303303
1,
304-
{{0xe0}},
305-
{{0x0}}}, // Overloads: wil
304+
{{0x4e0}},
305+
{{0xe0}}}, // Overloads: wil<wil
306306
{OC::FirstbitLo,
307307
"FirstbitLo",
308308
OCC::UnaryBits,
309309
"unaryBits",
310310
Attribute::ReadNone,
311311
1,
312-
{{0xe0}},
313-
{{0x0}}}, // Overloads: wil
312+
{{0x4e0}},
313+
{{0xe0}}}, // Overloads: wil<wil
314314

315315
// Unary uint
316316
{OC::FirstbitHi,
@@ -319,8 +319,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
319319
"unaryBits",
320320
Attribute::ReadNone,
321321
1,
322-
{{0xe0}},
323-
{{0x0}}}, // Overloads: wil
322+
{{0x4e0}},
323+
{{0xe0}}}, // Overloads: wil<wil
324324

325325
// Unary int
326326
{OC::FirstbitSHi,
@@ -329,8 +329,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
329329
"unaryBits",
330330
Attribute::ReadNone,
331331
1,
332-
{{0xe0}},
333-
{{0x0}}}, // Overloads: wil
332+
{{0x4e0}},
333+
{{0xe0}}}, // Overloads: wil<wil
334334

335335
// Binary float
336336
{OC::FMax,
@@ -3787,9 +3787,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
37873787
Type *pPos = GetSamplePosType();
37883788
Type *pV = Type::getVoidTy(m_Ctx);
37893789
Type *pI1 = Type::getInt1Ty(m_Ctx);
3790+
Type *pOlTplI1 = Type::getInt1Ty(m_Ctx);
37903791
Type *pI8 = Type::getInt8Ty(m_Ctx);
37913792
Type *pI16 = Type::getInt16Ty(m_Ctx);
37923793
Type *pI32 = Type::getInt32Ty(m_Ctx);
3794+
Type *pOlTplI32 = Type::getInt32Ty(m_Ctx);
3795+
if (pOverloadType->isVectorTy()) {
3796+
pOlTplI32 =
3797+
VectorType::get(pOlTplI32, pOverloadType->getVectorNumElements());
3798+
pOlTplI1 = VectorType::get(pOlTplI1, pOverloadType->getVectorNumElements());
3799+
}
3800+
37933801
Type *pPI32 = Type::getInt32PtrTy(m_Ctx);
37943802
(void)(pPI32); // Currently unused.
37953803
Type *pI64 = Type::getInt64Ty(m_Ctx);
@@ -3878,22 +3886,22 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
38783886
A(pETy);
38793887
break;
38803888
case OpCode::IsNaN:
3881-
A(pI1);
3889+
A(pOlTplI1);
38823890
A(pI32);
38833891
A(pETy);
38843892
break;
38853893
case OpCode::IsInf:
3886-
A(pI1);
3894+
A(pOlTplI1);
38873895
A(pI32);
38883896
A(pETy);
38893897
break;
38903898
case OpCode::IsFinite:
3891-
A(pI1);
3899+
A(pOlTplI1);
38923900
A(pI32);
38933901
A(pETy);
38943902
break;
38953903
case OpCode::IsNormal:
3896-
A(pI1);
3904+
A(pOlTplI1);
38973905
A(pI32);
38983906
A(pETy);
38993907
break;
@@ -3997,26 +4005,26 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
39974005
A(pETy);
39984006
break;
39994007
case OpCode::Countbits:
4000-
A(pI32);
4008+
A(pOlTplI32);
40014009
A(pI32);
40024010
A(pETy);
40034011
break;
40044012
case OpCode::FirstbitLo:
4005-
A(pI32);
4013+
A(pOlTplI32);
40064014
A(pI32);
40074015
A(pETy);
40084016
break;
40094017

40104018
// Unary uint
40114019
case OpCode::FirstbitHi:
4012-
A(pI32);
4020+
A(pOlTplI32);
40134021
A(pI32);
40144022
A(pETy);
40154023
break;
40164024

40174025
// Unary int
40184026
case OpCode::FirstbitSHi:
4019-
A(pI32);
4027+
A(pOlTplI32);
40204028
A(pI32);
40214029
A(pETy);
40224030
break;
@@ -4661,7 +4669,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
46614669
A(pI1);
46624670
break;
46634671
case OpCode::WaveActiveAllEqual:
4664-
A(pI1);
4672+
A(pOlTplI1);
46654673
A(pI32);
46664674
A(pETy);
46674675
break;
@@ -5397,7 +5405,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
53975405

53985406
// Quad Wave Ops
53995407
case OpCode::QuadVote:
5400-
A(pI1);
5408+
A(pOlTplI1);
54015409
A(pI32);
54025410
A(pI1);
54035411
A(pI8);

lib/HLSL/HLOperationLower.cpp

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,42 +2083,58 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
20832083
HLOperationLowerHelper &helper,
20842084
HLObjectOperationLowerHelper *pObjHelper,
20852085
bool &Translated) {
2086-
Value *firstbitHi =
2087-
TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
2088-
// firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
2086+
hlsl::OP *OP = &helper.hlslOP;
20892087
IRBuilder<> Builder(CI);
2090-
Constant *neg1 = Builder.getInt32(-1);
2091-
Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2088+
Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2089+
2090+
Type *Ty = Src->getType();
2091+
Type *RetTy = Type::getInt32Ty(CI->getContext());
2092+
unsigned NumElements = 0;
2093+
if (Ty->isVectorTy()) {
2094+
NumElements = Ty->getVectorNumElements();
2095+
RetTy = VectorType::get(RetTy, NumElements);
2096+
}
2097+
2098+
Constant *OpArg = OP->GetU32Const((unsigned)opcode);
2099+
Value *Args[] = {OpArg, Src};
2100+
2101+
Value *FirstbitHi =
2102+
TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
20922103

2093-
Type *Ty = src->getType();
20942104
IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
2095-
Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth() - 1);
2105+
Constant *Neg1 = Builder.getInt32(-1);
2106+
Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1);
20962107

2097-
if (Ty == Ty->getScalarType()) {
2098-
Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
2099-
Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
2100-
return Builder.CreateSelect(cond, neg1, sub);
2101-
} else {
2102-
Value *result = UndefValue::get(CI->getType());
2103-
unsigned vecSize = Ty->getVectorNumElements();
2104-
for (unsigned i = 0; i < vecSize; i++) {
2105-
Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
2106-
Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
2107-
Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
2108-
Value *Elt = Builder.CreateSelect(cond, neg1, sub);
2109-
result = Builder.CreateInsertElement(result, Elt, i);
2110-
}
2111-
return result;
2108+
if (NumElements > 0) {
2109+
Neg1 = ConstantVector::getSplat(NumElements, Neg1);
2110+
BitWidth = ConstantVector::getSplat(NumElements, BitWidth);
21122111
}
2112+
2113+
Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi);
2114+
Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi);
2115+
return Builder.CreateSelect(Cond, Neg1, Sub);
21132116
}
21142117

21152118
Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
21162119
HLOperationLowerHelper &helper,
21172120
HLObjectOperationLowerHelper *pObjHelper,
21182121
bool &Translated) {
2119-
Value *firstbitLo =
2120-
TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
2121-
return firstbitLo;
2122+
hlsl::OP *OP = &helper.hlslOP;
2123+
IRBuilder<> Builder(CI);
2124+
Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
2125+
2126+
Type *Ty = Src->getType();
2127+
Type *RetTy = Type::getInt32Ty(CI->getContext());
2128+
if (Ty->isVectorTy())
2129+
RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
2130+
2131+
Constant *OpArg = OP->GetU32Const((unsigned)opcode);
2132+
Value *Args[] = {OpArg, Src};
2133+
2134+
Value *FirstbitLo =
2135+
TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
2136+
2137+
return FirstbitLo;
21222138
}
21232139

21242140
Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,

tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,82 @@ void main() {
216216
// CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]]) ; Exp(value)
217217
fRes += pow(fVec2, fVec1);
218218

219+
// CHECK-NOT: extractelement
220+
// CHECK-NOT: insertelement
221+
// CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[hvec3]], [[hvec2]]
222+
// CHECK: [[atan:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 17, <[[NUM]] x half> [[div]]) ; Atan(value)
223+
// CHECK: [[add:%.*]] = fadd fast <[[NUM]] x half> [[atan]], <half 0x
224+
// CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[atan]], <half 0x
225+
// CHECK: [[xlt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], zeroinitializer
226+
// CHECK: [[xeq:%.*]] = fcmp fast oeq <[[NUM]] x half> [[hvec2]], zeroinitializer
227+
// CHECK: [[yge:%.*]] = fcmp fast oge <[[NUM]] x half> [[hvec3]], zeroinitializer
228+
// CHECK: [[ylt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec3]], zeroinitializer
229+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xlt]]
230+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> [[add]], <[[NUM]] x half>
231+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xlt]]
232+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> [[sub]], <[[NUM]] x half>
233+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xeq]]
234+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> <half 0x
235+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xeq]]
236+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> <half 0x
237+
hRes += atan2(hVec3, hVec2);
238+
239+
// CHECK-NOT: extractelement
240+
// CHECK-NOT: insertelement
241+
// CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[fvec3]], [[fvec2]]
242+
// CHECK: [[atan:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 17, <[[NUM]] x float> [[div]]) ; Atan(value)
243+
// CHECK: [[add:%.*]] = fadd fast <[[NUM]] x float> [[atan]], <float 0x
244+
// CHECK: [[sub:%.*]] = fadd fast <[[NUM]] x float> [[atan]], <float 0x
245+
// CHECK: [[xlt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec2]], zeroinitializer
246+
// CHECK: [[xeq:%.*]] = fcmp fast oeq <[[NUM]] x float> [[fvec2]], zeroinitializer
247+
// CHECK: [[yge:%.*]] = fcmp fast oge <[[NUM]] x float> [[fvec3]], zeroinitializer
248+
// CHECK: [[ylt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec3]], zeroinitializer
249+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xlt]]
250+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> [[add]], <[[NUM]] x float>
251+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xlt]]
252+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> [[sub]], <[[NUM]] x float>
253+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xeq]]
254+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> <float 0x
255+
// CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xeq]]
256+
// CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> <float 0x
257+
fRes += atan2(fVec3, fVec2);
258+
259+
// CHECK-NOT: extractelement
260+
// CHECK-NOT: insertelement
261+
// CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[hvec2]], [[hvec3]]
262+
// CHECK: [[ndiv:%.*]] = fsub fast <[[NUM]] x half> {{.*}}, [[div]]
263+
// CHECK: [[cmp:%.*]] = fcmp fast oge <[[NUM]] x half> [[div]], [[ndiv]]
264+
// CHECK: [[abs:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 6, <[[NUM]] x half> [[div]]) ; FAbs(value)
265+
// CHECK: [[frc:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 22, <[[NUM]] x half> [[abs]]) ; Frc(value)
266+
// CHECK: [[nfrc:%.*]] = fsub fast <[[NUM]] x half> {{.*}}, [[frc]]
267+
// CHECK: [[rfrc:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x half> [[frc]], <[[NUM]] x half> [[nfrc]]
268+
// CHECK: fmul fast <[[NUM]] x half> [[rfrc]], [[hvec3]]
269+
hRes += fmod(hVec2, hVec3);
270+
271+
// CHECK-NOT: extractelement
272+
// CHECK-NOT: insertelement
273+
// CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[fvec2]], [[fvec3]]
274+
// CHECK: [[ndiv:%.*]] = fsub fast <[[NUM]] x float> {{.*}}, [[div]]
275+
// CHECK: [[cmp:%.*]] = fcmp fast oge <[[NUM]] x float> [[div]], [[ndiv]]
276+
// CHECK: [[abs:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 6, <[[NUM]] x float> [[div]]) ; FAbs(value)
277+
// CHECK: [[frc:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 22, <[[NUM]] x float> [[abs]]) ; Frc(value)
278+
// CHECK: [[nfrc:%.*]] = fsub fast <[[NUM]] x float> {{.*}}, [[frc]]
279+
// CHECK: [[rfrc:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[frc]], <[[NUM]] x float> [[nfrc]]
280+
// CHECK: fmul fast <[[NUM]] x float> [[rfrc]], [[fvec3]]
281+
fRes += fmod(fVec2, fVec3);
282+
283+
// CHECK-NOT: extractelement
284+
// CHECK-NOT: insertelement
285+
// CHECK: [[exp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[hvec2]]) ; Exp(value)
286+
// CHECK: fmul fast <[[NUM]] x half> [[exp]], [[hvec1]]
287+
hRes += ldexp(hVec1, hVec2);
288+
289+
// CHECK-NOT: extractelement
290+
// CHECK-NOT: insertelement
291+
// CHECK: [[exp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[fvec2]]) ; Exp(value)
292+
// CHECK: fmul fast <[[NUM]] x float> [[exp]], [[fvec1]]
293+
fRes += ldexp(fVec1, fVec2);
294+
219295
vector<half, NUM> hVal;
220296
// CHECK-NOT: extractelement
221297
// CHECK-NOT: insertelement
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=isnan -DOP=8 -DNUM=39 %s | FileCheck %s
2+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=isinf -DOP=9 -DNUM=38 %s | FileCheck %s
3+
// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=isfinite -DOP=10 -DNUM=37 %s | FileCheck %s
4+
5+
// Test vector-enabled isspecial unary intrinsics that take float-like parameters and
6+
// and are "trivial" in that they can be implemented with a single call.
7+
// These return boolean vectors of the same size as their paraemter.
8+
9+
RWByteAddressBuffer buf;
10+
11+
// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
12+
// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
13+
14+
[numthreads(8,1,1)]
15+
void main() {
16+
17+
// Capture opcode number.
18+
// CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
19+
// CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
20+
buf.Store(999, OP);
21+
22+
// CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
23+
24+
// CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
25+
// CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
26+
vector<float16_t, NUM> hVec = buf.Load<vector<float16_t, NUM> >(0);
27+
28+
// Convergent markers prevent GVN removal of redundant annotateHandle calls.
29+
// CONV: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
30+
31+
// CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
32+
// CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
33+
vector<float, NUM> fVec = buf.Load<vector<float, NUM> >(1024);
34+
35+
// CHECK-NOT: extractelement
36+
// CHECK-NOT: insertelement
37+
// NOTE: This behavior will change with #7588
38+
// CHECK: [[tmp:%.*]] = fpext <[[NUM]] x half> [[hvec]] to <[[NUM]] x float>
39+
// CHECK: call <[[NUM]] x i1> @dx.op.isSpecialFloat.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[tmp]])
40+
vector<bool, NUM> hRes = FUNC(hVec);
41+
42+
// CHECK-NOT: extractelement
43+
// CHECK-NOT: insertelement
44+
// CHECK: call <[[NUM]] x i1> @dx.op.isSpecialFloat.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]])
45+
vector<bool, NUM> fRes = FUNC(fVec);
46+
47+
// CHECK-NOT: extractelement
48+
// CHECK-NOT: insertelement
49+
buf.Store<vector<bool, NUM> >(0, hRes);
50+
buf.Store<vector<bool, NUM> >(1024, fRes);
51+
}

0 commit comments

Comments
 (0)