microsoft
diff --git a/‎lib/DXIL/DxilOperations.cpp
Lines changed: 34 additions & 26 deletions b/‎lib/DXIL/DxilOperations.cpp
Lines changed: 34 additions & 26 deletions
diff --git a/‎lib/HLSL/HLOperationLower.cpp
Lines changed: 41 additions & 25 deletions b/‎lib/HLSL/HLOperationLower.cpp
Lines changed: 41 additions & 25 deletions
diff --git a/‎tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
Lines changed: 76 additions & 0 deletions b/‎tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl
Lines changed: 76 additions & 0 deletions
diff --git a/‎tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-isspecial-intrinsics.hlsl
Lines changed: 51 additions & 0 deletions b/‎tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-isspecial-intrinsics.hlsl
Lines changed: 51 additions & 0 deletions
@@ -113,32 +113,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "isSpecialFloat",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::IsInf,
      "IsInf",
      OCC::IsSpecialFloat,
      "isSpecialFloat",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::IsFinite,
      "IsFinite",
      OCC::IsSpecialFloat,
      "isSpecialFloat",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::IsNormal,
      "IsNormal",
      OCC::IsSpecialFloat,
      "isSpecialFloat",
      Attribute::ReadNone,
      1,
-     {{0x3}},
-     {{0x0}}}, // Overloads: hf
+     {{0x403}},
+     {{0x3}}}, // Overloads: hf<hf
     {OC::Cos,
      "Cos",
      OCC::Unary,
@@ -301,16 +301,16 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unaryBits",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
     {OC::FirstbitLo,
      "FirstbitLo",
      OCC::UnaryBits,
      "unaryBits",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Unary uint
     {OC::FirstbitHi,
@@ -319,8 +319,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unaryBits",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Unary int
     {OC::FirstbitSHi,
@@ -329,8 +329,8 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
      "unaryBits",
      Attribute::ReadNone,
      1,
-     {{0xe0}},
-     {{0x0}}}, // Overloads: wil
+     {{0x4e0}},
+     {{0xe0}}}, // Overloads: wil<wil
 
     // Binary float
     {OC::FMax,
@@ -3787,9 +3787,17 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
   Type *pPos = GetSamplePosType();
   Type *pV = Type::getVoidTy(m_Ctx);
   Type *pI1 = Type::getInt1Ty(m_Ctx);
+  Type *pOlTplI1 = Type::getInt1Ty(m_Ctx);
   Type *pI8 = Type::getInt8Ty(m_Ctx);
   Type *pI16 = Type::getInt16Ty(m_Ctx);
   Type *pI32 = Type::getInt32Ty(m_Ctx);
+  Type *pOlTplI32 = Type::getInt32Ty(m_Ctx);
+  if (pOverloadType->isVectorTy()) {
+    pOlTplI32 =
+        VectorType::get(pOlTplI32, pOverloadType->getVectorNumElements());
+    pOlTplI1 = VectorType::get(pOlTplI1, pOverloadType->getVectorNumElements());
+  }
+
   Type *pPI32 = Type::getInt32PtrTy(m_Ctx);
   (void)(pPI32); // Currently unused.
   Type *pI64 = Type::getInt64Ty(m_Ctx);
@@ -3878,22 +3886,22 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pETy);
     break;
   case OpCode::IsNaN:
-    A(pI1);
+    A(pOlTplI1);
     A(pI32);
     A(pETy);
     break;
   case OpCode::IsInf:
-    A(pI1);
+    A(pOlTplI1);
     A(pI32);
     A(pETy);
     break;
   case OpCode::IsFinite:
-    A(pI1);
+    A(pOlTplI1);
     A(pI32);
     A(pETy);
     break;
   case OpCode::IsNormal:
-    A(pI1);
+    A(pOlTplI1);
     A(pI32);
     A(pETy);
     break;
@@ -3997,26 +4005,26 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pETy);
     break;
   case OpCode::Countbits:
-    A(pI32);
+    A(pOlTplI32);
     A(pI32);
     A(pETy);
     break;
   case OpCode::FirstbitLo:
-    A(pI32);
+    A(pOlTplI32);
     A(pI32);
     A(pETy);
     break;
 
     // Unary uint
   case OpCode::FirstbitHi:
-    A(pI32);
+    A(pOlTplI32);
     A(pI32);
     A(pETy);
     break;
 
     // Unary int
   case OpCode::FirstbitSHi:
-    A(pI32);
+    A(pOlTplI32);
     A(pI32);
     A(pETy);
     break;
@@ -4661,7 +4669,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pI1);
     break;
   case OpCode::WaveActiveAllEqual:
-    A(pI1);
+    A(pOlTplI1);
     A(pI32);
     A(pETy);
     break;
@@ -5397,7 +5405,7 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
 
     // Quad Wave Ops
   case OpCode::QuadVote:
-    A(pI1);
+    A(pOlTplI1);
     A(pI32);
     A(pI1);
     A(pI8);
 
@@ -2083,42 +2083,58 @@ Value *TranslateFirstbitHi(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                            HLOperationLowerHelper &helper,
                            HLObjectOperationLowerHelper *pObjHelper,
                            bool &Translated) {
-  Value *firstbitHi =
-      TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
-  // firstbitHi == -1? -1 : (bitWidth-1 -firstbitHi);
+  hlsl::OP *OP = &helper.hlslOP;
   IRBuilder<> Builder(CI);
-  Constant *neg1 = Builder.getInt32(-1);
-  Value *src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+
+  Type *Ty = Src->getType();
+  Type *RetTy = Type::getInt32Ty(CI->getContext());
+  unsigned NumElements = 0;
+  if (Ty->isVectorTy()) {
+    NumElements = Ty->getVectorNumElements();
+    RetTy = VectorType::get(RetTy, NumElements);
+  }
+
+  Constant *OpArg = OP->GetU32Const((unsigned)opcode);
+  Value *Args[] = {OpArg, Src};
+
+  Value *FirstbitHi =
+      TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
 
-  Type *Ty = src->getType();
   IntegerType *EltTy = cast<IntegerType>(Ty->getScalarType());
-  Constant *bitWidth = Builder.getInt32(EltTy->getBitWidth() - 1);
+  Constant *Neg1 = Builder.getInt32(-1);
+  Constant *BitWidth = Builder.getInt32(EltTy->getBitWidth() - 1);
 
-  if (Ty == Ty->getScalarType()) {
-    Value *sub = Builder.CreateSub(bitWidth, firstbitHi);
-    Value *cond = Builder.CreateICmpEQ(neg1, firstbitHi);
-    return Builder.CreateSelect(cond, neg1, sub);
-  } else {
-    Value *result = UndefValue::get(CI->getType());
-    unsigned vecSize = Ty->getVectorNumElements();
-    for (unsigned i = 0; i < vecSize; i++) {
-      Value *EltFirstBit = Builder.CreateExtractElement(firstbitHi, i);
-      Value *sub = Builder.CreateSub(bitWidth, EltFirstBit);
-      Value *cond = Builder.CreateICmpEQ(neg1, EltFirstBit);
-      Value *Elt = Builder.CreateSelect(cond, neg1, sub);
-      result = Builder.CreateInsertElement(result, Elt, i);
-    }
-    return result;
+  if (NumElements > 0) {
+    Neg1 = ConstantVector::getSplat(NumElements, Neg1);
+    BitWidth = ConstantVector::getSplat(NumElements, BitWidth);
   }
+
+  Value *Sub = Builder.CreateSub(BitWidth, FirstbitHi);
+  Value *Cond = Builder.CreateICmpEQ(Neg1, FirstbitHi);
+  return Builder.CreateSelect(Cond, Neg1, Sub);
 }
 
 Value *TranslateFirstbitLo(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
                            HLOperationLowerHelper &helper,
                            HLObjectOperationLowerHelper *pObjHelper,
                            bool &Translated) {
-  Value *firstbitLo =
-      TrivialUnaryOperationRet(CI, IOP, opcode, helper, pObjHelper, Translated);
-  return firstbitLo;
+  hlsl::OP *OP = &helper.hlslOP;
+  IRBuilder<> Builder(CI);
+  Value *Src = CI->getArgOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+
+  Type *Ty = Src->getType();
+  Type *RetTy = Type::getInt32Ty(CI->getContext());
+  if (Ty->isVectorTy())
+    RetTy = VectorType::get(RetTy, Ty->getVectorNumElements());
+
+  Constant *OpArg = OP->GetU32Const((unsigned)opcode);
+  Value *Args[] = {OpArg, Src};
+
+  Value *FirstbitLo =
+      TrivialDxilOperation(opcode, Args, Ty, RetTy, OP, Builder);
+
+  return FirstbitLo;
 }
 
 Value *TranslateLit(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
 
@@ -216,6 +216,82 @@ void main() {
   // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]])  ; Exp(value)
   fRes += pow(fVec2, fVec1);
 
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[hvec3]], [[hvec2]]
+  // CHECK: [[atan:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 17, <[[NUM]] x half> [[div]]) ; Atan(value)
+  // CHECK: [[add:%.*]] = fadd fast <[[NUM]] x half> [[atan]], <half 0x
+  // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[atan]], <half 0x
+  // CHECK: [[xlt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec2]], zeroinitializer
+  // CHECK: [[xeq:%.*]] = fcmp fast oeq <[[NUM]] x half> [[hvec2]], zeroinitializer
+  // CHECK: [[yge:%.*]] = fcmp fast oge <[[NUM]] x half> [[hvec3]], zeroinitializer
+  // CHECK: [[ylt:%.*]] = fcmp fast olt <[[NUM]] x half> [[hvec3]], zeroinitializer
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xlt]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> [[add]], <[[NUM]] x half>
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xlt]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> [[sub]], <[[NUM]] x half>
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xeq]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> <half 0x
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xeq]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x half> <half 0x
+  hRes += atan2(hVec3, hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[fvec3]], [[fvec2]]
+  // CHECK: [[atan:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 17, <[[NUM]] x float> [[div]]) ; Atan(value)
+  // CHECK: [[add:%.*]] = fadd fast <[[NUM]] x float> [[atan]], <float 0x
+  // CHECK: [[sub:%.*]] = fadd fast <[[NUM]] x float> [[atan]], <float 0x
+  // CHECK: [[xlt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec2]], zeroinitializer
+  // CHECK: [[xeq:%.*]] = fcmp fast oeq <[[NUM]] x float> [[fvec2]], zeroinitializer
+  // CHECK: [[yge:%.*]] = fcmp fast oge <[[NUM]] x float> [[fvec3]], zeroinitializer
+  // CHECK: [[ylt:%.*]] = fcmp fast olt <[[NUM]] x float> [[fvec3]], zeroinitializer
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xlt]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> [[add]], <[[NUM]] x float>
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xlt]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> [[sub]], <[[NUM]] x float>
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[ylt]], [[xeq]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> <float 0x
+  // CHECK: [[and:%.*]] = and <[[NUM]] x i1> [[yge]], [[xeq]]
+  // CHECK: select <[[NUM]] x i1> [[and]], <[[NUM]] x float> <float 0x
+  fRes += atan2(fVec3, fVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x half> [[hvec2]], [[hvec3]]
+  // CHECK: [[ndiv:%.*]] = fsub fast <[[NUM]] x half> {{.*}}, [[div]]
+  // CHECK: [[cmp:%.*]] = fcmp fast oge <[[NUM]] x half> [[div]], [[ndiv]]
+  // CHECK: [[abs:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 6, <[[NUM]] x half> [[div]]) ; FAbs(value)
+  // CHECK: [[frc:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 22, <[[NUM]] x half> [[abs]]) ; Frc(value)
+  // CHECK: [[nfrc:%.*]] = fsub fast <[[NUM]] x half> {{.*}}, [[frc]]
+  // CHECK: [[rfrc:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x half> [[frc]], <[[NUM]] x half> [[nfrc]]
+  // CHECK: fmul fast <[[NUM]] x half> [[rfrc]], [[hvec3]]
+  hRes += fmod(hVec2, hVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[div:%.*]] = fdiv fast <[[NUM]] x float> [[fvec2]], [[fvec3]]
+  // CHECK: [[ndiv:%.*]] = fsub fast <[[NUM]] x float> {{.*}}, [[div]]
+  // CHECK: [[cmp:%.*]] = fcmp fast oge <[[NUM]] x float> [[div]], [[ndiv]]
+  // CHECK: [[abs:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 6, <[[NUM]] x float> [[div]]) ; FAbs(value)
+  // CHECK: [[frc:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 22, <[[NUM]] x float> [[abs]]) ; Frc(value)
+  // CHECK: [[nfrc:%.*]] = fsub fast <[[NUM]] x float> {{.*}}, [[frc]]
+  // CHECK: [[rfrc:%.*]] = select <[[NUM]] x i1> [[cmp]], <[[NUM]] x float> [[frc]], <[[NUM]] x float> [[nfrc]]
+  // CHECK: fmul fast <[[NUM]] x float> [[rfrc]], [[fvec3]]
+  fRes += fmod(fVec2, fVec3);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[exp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[hvec2]]) ; Exp(value)
+  // CHECK: fmul fast <[[NUM]] x half> [[exp]], [[hvec1]]
+  hRes += ldexp(hVec1, hVec2);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: [[exp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[fvec2]]) ; Exp(value)
+  // CHECK: fmul fast <[[NUM]] x float> [[exp]], [[fvec1]]
+  fRes += ldexp(fVec1, fVec2);
+
   vector<half, NUM> hVal;
   // CHECK-NOT: extractelement
   // CHECK-NOT: insertelement
 
@@ -0,0 +1,51 @@
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=isnan    -DOP=8  -DNUM=39 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=isinf    -DOP=9  -DNUM=38 %s | FileCheck %s
+// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=isfinite -DOP=10 -DNUM=37 %s | FileCheck %s
+
+// Test vector-enabled isspecial unary intrinsics that take float-like parameters and
+// and are "trivial" in that they can be implemented with a single call.
+// These return boolean vectors of the same size as their paraemter.
+
+RWByteAddressBuffer buf;
+
+// CHECK-DAG: %dx.types.ResRet.[[HTY:v[0-9]*f16]] = type { <[[NUM:[0-9]*]] x half>
+// CHECK-DAG: %dx.types.ResRet.[[FTY:v[0-9]*f32]] = type { <[[NUM]] x float>
+
+[numthreads(8,1,1)]
+void main() {
+
+  // Capture opcode number.
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+  // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[buf]], i32 999, i32 undef, i32 [[OP:[0-9]*]]
+  buf.Store(999, OP);
+
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[HTY]] @dx.op.rawBufferVectorLoad.[[HTY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK: [[hvec:%.*]] = extractvalue %dx.types.ResRet.[[HTY]] [[ld]], 0
+  vector<float16_t, NUM> hVec = buf.Load<vector<float16_t, NUM> >(0);
+
+  // Convergent markers prevent GVN removal of redundant annotateHandle calls.
+  // CONV: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
+
+  // CHECK: [[ld:%.*]] = call %dx.types.ResRet.[[FTY]] @dx.op.rawBufferVectorLoad.[[FTY]](i32 303, %dx.types.Handle [[buf]], i32 1024
+  // CHECK: [[fvec:%.*]] = extractvalue %dx.types.ResRet.[[FTY]] [[ld]], 0
+  vector<float, NUM> fVec = buf.Load<vector<float, NUM> >(1024);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // NOTE: This behavior will change with #7588
+  // CHECK: [[tmp:%.*]] = fpext <[[NUM]] x half> [[hvec]] to <[[NUM]] x float>
+  // CHECK: call <[[NUM]] x i1> @dx.op.isSpecialFloat.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[tmp]])
+  vector<bool, NUM> hRes = FUNC(hVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  // CHECK: call <[[NUM]] x i1> @dx.op.isSpecialFloat.[[FTY]](i32 [[OP]], <[[NUM]] x float> [[fvec]])
+  vector<bool, NUM> fRes = FUNC(fVec);
+
+  // CHECK-NOT: extractelement
+  // CHECK-NOT: insertelement
+  buf.Store<vector<bool, NUM> >(0, hRes);
+  buf.Store<vector<bool, NUM> >(1024, fRes);
+}