diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 805b79491e6ea..6e1301cc9de6f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4352,7 +4352,7 @@ defm ptrauth_init_fini_address_discrimination : OptInCC1FFlag<"ptrauth-init-fini def fenable_matrix : Flag<["-"], "fenable-matrix">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable matrix data type and related builtin functions">, - MarshallingInfoFlag>; + MarshallingInfoFlag, hlsl.KeyPath>; defm raw_string_literals : BoolFOption<"raw-string-literals", LangOpts<"RawStringLiterals">, Default, diff --git a/clang/include/clang/Sema/HLSLExternalSemaSource.h b/clang/include/clang/Sema/HLSLExternalSemaSource.h index 3c7495e66055d..6f4b72045a946 100644 --- a/clang/include/clang/Sema/HLSLExternalSemaSource.h +++ b/clang/include/clang/Sema/HLSLExternalSemaSource.h @@ -28,6 +28,7 @@ class HLSLExternalSemaSource : public ExternalSemaSource { llvm::DenseMap Completions; void defineHLSLVectorAlias(); + void defineHLSLMatrixAlias(); void defineTrivialHLSLTypes(); void defineHLSLTypesWithForwardDeclarations(); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 6d8db5cf4ffd2..09e376156df7f 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -852,10 +852,18 @@ void TypePrinter::printExtVectorAfter(const ExtVectorType *T, raw_ostream &OS) { void TypePrinter::printConstantMatrixBefore(const ConstantMatrixType *T, raw_ostream &OS) { + if (Policy.UseHLSLTypes) + OS << "matrix<"; printBefore(T->getElementType(), OS); - OS << " __attribute__((matrix_type("; + if (!Policy.UseHLSLTypes) + OS << " __attribute__((matrix_type("; + else + OS << ", "; OS << T->getNumRows() << ", " << T->getNumColumns(); - OS << ")))"; + if (!Policy.UseHLSLTypes) + OS << ")))"; + else + OS << ">"; } void TypePrinter::printConstantMatrixAfter(const ConstantMatrixType *T, @@ -865,16 +873,25 @@ void TypePrinter::printConstantMatrixAfter(const ConstantMatrixType *T, void TypePrinter::printDependentSizedMatrixBefore( const DependentSizedMatrixType *T, raw_ostream &OS) { + if (Policy.UseHLSLTypes) + OS << "matrix<"; printBefore(T->getElementType(), OS); - OS << " __attribute__((matrix_type("; - if (T->getRowExpr()) { - T->getRowExpr()->printPretty(OS, nullptr, Policy); - } + if (!Policy.UseHLSLTypes) + OS << " __attribute__((matrix_type("; + else + OS << ", "; + + if (Expr *E = T->getRowExpr()) + E->printPretty(OS, nullptr, Policy); OS << ", "; - if (T->getColumnExpr()) { - T->getColumnExpr()->printPretty(OS, nullptr, Policy); - } - OS << ")))"; + if (Expr *E = T->getColumnExpr()) + E->printPretty(OS, nullptr, Policy); + + OS << ", "; + if (!Policy.UseHLSLTypes) + OS << ")))"; + else + OS << ">"; } void TypePrinter::printDependentSizedMatrixAfter( diff --git a/clang/lib/Headers/hlsl/hlsl_basic_types.h b/clang/lib/Headers/hlsl/hlsl_basic_types.h index eff94e0d7f950..b6eeffa2f5e36 100644 --- a/clang/lib/Headers/hlsl/hlsl_basic_types.h +++ b/clang/lib/Headers/hlsl/hlsl_basic_types.h @@ -115,6 +115,238 @@ typedef vector float64_t2; typedef vector float64_t3; typedef vector float64_t4; +#ifdef __HLSL_ENABLE_16_BIT +typedef matrix int16_t1x1; +typedef matrix int16_t1x2; +typedef matrix int16_t1x3; +typedef matrix int16_t1x4; +typedef matrix int16_t2x1; +typedef matrix int16_t2x2; +typedef matrix int16_t2x3; +typedef matrix int16_t2x4; +typedef matrix int16_t3x1; +typedef matrix int16_t3x2; +typedef matrix int16_t3x3; +typedef matrix int16_t3x4; +typedef matrix int16_t4x1; +typedef matrix int16_t4x2; +typedef matrix int16_t4x3; +typedef matrix int16_t4x4; +typedef matrix uint16_t1x1; +typedef matrix uint16_t1x2; +typedef matrix uint16_t1x3; +typedef matrix uint16_t1x4; +typedef matrix uint16_t2x1; +typedef matrix uint16_t2x2; +typedef matrix uint16_t2x3; +typedef matrix uint16_t2x4; +typedef matrix uint16_t3x1; +typedef matrix uint16_t3x2; +typedef matrix uint16_t3x3; +typedef matrix uint16_t3x4; +typedef matrix uint16_t4x1; +typedef matrix uint16_t4x2; +typedef matrix uint16_t4x3; +typedef matrix uint16_t4x4; +#endif +typedef matrix int1x1; +typedef matrix int1x2; +typedef matrix int1x3; +typedef matrix int1x4; +typedef matrix int2x1; +typedef matrix int2x2; +typedef matrix int2x3; +typedef matrix int2x4; +typedef matrix int3x1; +typedef matrix int3x2; +typedef matrix int3x3; +typedef matrix int3x4; +typedef matrix int4x1; +typedef matrix int4x2; +typedef matrix int4x3; +typedef matrix int4x4; +typedef matrix uint1x1; +typedef matrix uint1x2; +typedef matrix uint1x3; +typedef matrix uint1x4; +typedef matrix uint2x1; +typedef matrix uint2x2; +typedef matrix uint2x3; +typedef matrix uint2x4; +typedef matrix uint3x1; +typedef matrix uint3x2; +typedef matrix uint3x3; +typedef matrix uint3x4; +typedef matrix uint4x1; +typedef matrix uint4x2; +typedef matrix uint4x3; +typedef matrix uint4x4; +typedef matrix int32_t1x1; +typedef matrix int32_t1x2; +typedef matrix int32_t1x3; +typedef matrix int32_t1x4; +typedef matrix int32_t2x1; +typedef matrix int32_t2x2; +typedef matrix int32_t2x3; +typedef matrix int32_t2x4; +typedef matrix int32_t3x1; +typedef matrix int32_t3x2; +typedef matrix int32_t3x3; +typedef matrix int32_t3x4; +typedef matrix int32_t4x1; +typedef matrix int32_t4x2; +typedef matrix int32_t4x3; +typedef matrix int32_t4x4; +typedef matrix uint32_t1x1; +typedef matrix uint32_t1x2; +typedef matrix uint32_t1x3; +typedef matrix uint32_t1x4; +typedef matrix uint32_t2x1; +typedef matrix uint32_t2x2; +typedef matrix uint32_t2x3; +typedef matrix uint32_t2x4; +typedef matrix uint32_t3x1; +typedef matrix uint32_t3x2; +typedef matrix uint32_t3x3; +typedef matrix uint32_t3x4; +typedef matrix uint32_t4x1; +typedef matrix uint32_t4x2; +typedef matrix uint32_t4x3; +typedef matrix uint32_t4x4; +typedef matrix int64_t1x1; +typedef matrix int64_t1x2; +typedef matrix int64_t1x3; +typedef matrix int64_t1x4; +typedef matrix int64_t2x1; +typedef matrix int64_t2x2; +typedef matrix int64_t2x3; +typedef matrix int64_t2x4; +typedef matrix int64_t3x1; +typedef matrix int64_t3x2; +typedef matrix int64_t3x3; +typedef matrix int64_t3x4; +typedef matrix int64_t4x1; +typedef matrix int64_t4x2; +typedef matrix int64_t4x3; +typedef matrix int64_t4x4; +typedef matrix uint64_t1x1; +typedef matrix uint64_t1x2; +typedef matrix uint64_t1x3; +typedef matrix uint64_t1x4; +typedef matrix uint64_t2x1; +typedef matrix uint64_t2x2; +typedef matrix uint64_t2x3; +typedef matrix uint64_t2x4; +typedef matrix uint64_t3x1; +typedef matrix uint64_t3x2; +typedef matrix uint64_t3x3; +typedef matrix uint64_t3x4; +typedef matrix uint64_t4x1; +typedef matrix uint64_t4x2; +typedef matrix uint64_t4x3; +typedef matrix uint64_t4x4; + +typedef matrix half1x1; +typedef matrix half1x2; +typedef matrix half1x3; +typedef matrix half1x4; +typedef matrix half2x1; +typedef matrix half2x2; +typedef matrix half2x3; +typedef matrix half2x4; +typedef matrix half3x1; +typedef matrix half3x2; +typedef matrix half3x3; +typedef matrix half3x4; +typedef matrix half4x1; +typedef matrix half4x2; +typedef matrix half4x3; +typedef matrix half4x4; +typedef matrix float1x1; +typedef matrix float1x2; +typedef matrix float1x3; +typedef matrix float1x4; +typedef matrix float2x1; +typedef matrix float2x2; +typedef matrix float2x3; +typedef matrix float2x4; +typedef matrix float3x1; +typedef matrix float3x2; +typedef matrix float3x3; +typedef matrix float3x4; +typedef matrix float4x1; +typedef matrix float4x2; +typedef matrix float4x3; +typedef matrix float4x4; +typedef matrix double1x1; +typedef matrix double1x2; +typedef matrix double1x3; +typedef matrix double1x4; +typedef matrix double2x1; +typedef matrix double2x2; +typedef matrix double2x3; +typedef matrix double2x4; +typedef matrix double3x1; +typedef matrix double3x2; +typedef matrix double3x3; +typedef matrix double3x4; +typedef matrix double4x1; +typedef matrix double4x2; +typedef matrix double4x3; +typedef matrix double4x4; + +#ifdef __HLSL_ENABLE_16_BIT +typedef matrix float16_t1x1; +typedef matrix float16_t1x2; +typedef matrix float16_t1x3; +typedef matrix float16_t1x4; +typedef matrix float16_t2x1; +typedef matrix float16_t2x2; +typedef matrix float16_t2x3; +typedef matrix float16_t2x4; +typedef matrix float16_t3x1; +typedef matrix float16_t3x2; +typedef matrix float16_t3x3; +typedef matrix float16_t3x4; +typedef matrix float16_t4x1; +typedef matrix float16_t4x2; +typedef matrix float16_t4x3; +typedef matrix float16_t4x4; +#endif + +typedef matrix float32_t1x1; +typedef matrix float32_t1x2; +typedef matrix float32_t1x3; +typedef matrix float32_t1x4; +typedef matrix float32_t2x1; +typedef matrix float32_t2x2; +typedef matrix float32_t2x3; +typedef matrix float32_t2x4; +typedef matrix float32_t3x1; +typedef matrix float32_t3x2; +typedef matrix float32_t3x3; +typedef matrix float32_t3x4; +typedef matrix float32_t4x1; +typedef matrix float32_t4x2; +typedef matrix float32_t4x3; +typedef matrix float32_t4x4; +typedef matrix float64_t1x1; +typedef matrix float64_t1x2; +typedef matrix float64_t1x3; +typedef matrix float64_t1x4; +typedef matrix float64_t2x1; +typedef matrix float64_t2x2; +typedef matrix float64_t2x3; +typedef matrix float64_t2x4; +typedef matrix float64_t3x1; +typedef matrix float64_t3x2; +typedef matrix float64_t3x3; +typedef matrix float64_t3x4; +typedef matrix float64_t4x1; +typedef matrix float64_t4x2; +typedef matrix float64_t4x3; +typedef matrix float64_t4x4; + } // namespace hlsl #endif //_HLSL_HLSL_BASIC_TYPES_H_ diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 6eb24c578f602..bd621661bb4bf 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -459,8 +459,81 @@ void HLSLExternalSemaSource::defineHLSLVectorAlias() { HLSLNamespace->addDecl(Template); } +void HLSLExternalSemaSource::defineHLSLMatrixAlias() { + ASTContext &AST = SemaPtr->getASTContext(); + + llvm::SmallVector TemplateParams; + + auto *TypeParam = TemplateTypeParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 0, + &AST.Idents.get("element", tok::TokenKind::identifier), false, false); + TypeParam->setDefaultArgument( + AST, SemaPtr->getTrivialTemplateArgumentLoc( + TemplateArgument(AST.FloatTy), QualType(), SourceLocation())); + + TemplateParams.emplace_back(TypeParam); + + // these should be 64 bit to be consistent with other clang matrices. + auto *RowsParam = NonTypeTemplateParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 1, + &AST.Idents.get("rows_count", tok::TokenKind::identifier), AST.IntTy, + false, AST.getTrivialTypeSourceInfo(AST.IntTy)); + llvm::APInt RVal(AST.getIntWidth(AST.IntTy), 4); + TemplateArgument RDefault(AST, llvm::APSInt(std::move(RVal)), AST.IntTy, + /*IsDefaulted=*/true); + RowsParam->setDefaultArgument( + AST, SemaPtr->getTrivialTemplateArgumentLoc(RDefault, AST.IntTy, + SourceLocation(), RowsParam)); + TemplateParams.emplace_back(RowsParam); + + auto *ColsParam = NonTypeTemplateParmDecl::Create( + AST, HLSLNamespace, SourceLocation(), SourceLocation(), 0, 2, + &AST.Idents.get("cols_count", tok::TokenKind::identifier), AST.IntTy, + false, AST.getTrivialTypeSourceInfo(AST.IntTy)); + llvm::APInt CVal(AST.getIntWidth(AST.IntTy), 4); + TemplateArgument CDefault(AST, llvm::APSInt(std::move(CVal)), AST.IntTy, + /*IsDefaulted=*/true); + ColsParam->setDefaultArgument( + AST, SemaPtr->getTrivialTemplateArgumentLoc(CDefault, AST.IntTy, + SourceLocation(), ColsParam)); + TemplateParams.emplace_back(RowsParam); + + auto *ParamList = + TemplateParameterList::Create(AST, SourceLocation(), SourceLocation(), + TemplateParams, SourceLocation(), nullptr); + + IdentifierInfo &II = AST.Idents.get("matrix", tok::TokenKind::identifier); + + QualType AliasType = AST.getDependentSizedMatrixType( + AST.getTemplateTypeParmType(0, 0, false, TypeParam), + DeclRefExpr::Create( + AST, NestedNameSpecifierLoc(), SourceLocation(), RowsParam, false, + DeclarationNameInfo(RowsParam->getDeclName(), SourceLocation()), + AST.IntTy, VK_LValue), + DeclRefExpr::Create( + AST, NestedNameSpecifierLoc(), SourceLocation(), ColsParam, false, + DeclarationNameInfo(ColsParam->getDeclName(), SourceLocation()), + AST.IntTy, VK_LValue), + SourceLocation()); + + auto *Record = TypeAliasDecl::Create(AST, HLSLNamespace, SourceLocation(), + SourceLocation(), &II, + AST.getTrivialTypeSourceInfo(AliasType)); + Record->setImplicit(true); + + auto *Template = + TypeAliasTemplateDecl::Create(AST, HLSLNamespace, SourceLocation(), + Record->getIdentifier(), ParamList, Record); + + Record->setDescribedAliasTemplate(Template); + Template->setImplicit(true); + Template->setLexicalDeclContext(Record->getDeclContext()); + HLSLNamespace->addDecl(Template); +} + void HLSLExternalSemaSource::defineTrivialHLSLTypes() { defineHLSLVectorAlias(); + defineHLSLMatrixAlias(); } /// Set up common members and attributes for buffer types diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index e526a11973975..1471b402c1158 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2447,7 +2447,7 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize, QualType Sema::BuildMatrixType(QualType ElementTy, Expr *NumRows, Expr *NumCols, SourceLocation AttrLoc) { - assert(Context.getLangOpts().MatrixTypes && + assert(getLangOpts().MatrixTypes && "Should never build a matrix type when it is disabled"); // Check element type, if it is not dependent. diff --git a/clang/test/AST/HLSL/matrix-alias.hlsl b/clang/test/AST/HLSL/matrix-alias.hlsl new file mode 100644 index 0000000000000..307b317998f85 --- /dev/null +++ b/clang/test/AST/HLSL/matrix-alias.hlsl @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -o - %s | FileCheck %s + +// Test that matrix aliases are set up properly for HLSL + +// CHECK: NamespaceDecl 0x{{[0-9a-fA-F]+}} <> implicit hlsl +// CHECK-NEXT: TypeAliasTemplateDecl 0x{{[0-9a-fA-F]+}} <> implicit vector +// CHECK-NEXT: TemplateTypeParmDecl 0x{{[0-9a-fA-F]+}} <> class depth 0 index 0 element +// CHECK-NEXT: TemplateArgument type 'float' +// CHECK-NEXT: BuiltinType 0x{{[0-9a-fA-F]+}} 'float' +// CHECK-NEXT: NonTypeTemplateParmDecl 0x{{[0-9a-fA-F]+}} <> 'int' depth 0 index 1 element_count +// CHECK-NEXT: TemplateArgument expr +// CHECK-NEXT: IntegerLiteral 0x{{[0-9a-fA-F]+}} <> 'int' 4 +// CHECK-NEXT: TypeAliasDecl 0x{{[0-9a-fA-F]+}} <> implicit vector 'vector' +// CHECK-NEXT: DependentSizedExtVectorType 0x{{[0-9a-fA-F]+}} 'vector' dependent +// CHECK-NEXT: TemplateTypeParmType 0x{{[0-9a-fA-F]+}} 'element' dependent depth 0 index 0 +// CHECK-NEXT: TemplateTypeParm 0x{{[0-9a-fA-F]+}} 'element' +// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} <> 'int' lvalue +// CHECK-SAME: NonTypeTemplateParm 0x{{[0-9a-fA-F]+}} 'element_count' 'int' + +// Make sure we got a using directive at the end. +// CHECK: UsingDirectiveDecl 0x{{[0-9a-fA-F]+}} <> Namespace 0x{{[0-9a-fA-F]+}} 'hlsl' + +[numthreads(1,1,1)] +int entry() { + // Verify that the alias is generated inside the hlsl namespace. + hlsl::matrix Mat2x2; + + // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:29 Mat2x2 'hlsl::matrix' + + // Verify that you don't need to specify the namespace. + matrix Vec2x2a; + + // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:21 Vec2x2a 'matrix' + + // Build a bigger matrix. + matrix Mat4x4; + + // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:24 Mat4x4 'matrix' + + // Verify that the implicit arguments generate the correct type. + matrix<> ImpMat4x4; + + // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:12 ImpMat4x4 'matrix<>':'matrix' + return 1; +} diff --git a/clang/test/AST/HLSL/vector-alias.hlsl b/clang/test/AST/HLSL/vector-alias.hlsl index 3d112ee1b2230..e7c72d51a6338 100644 --- a/clang/test/AST/HLSL/vector-alias.hlsl +++ b/clang/test/AST/HLSL/vector-alias.hlsl @@ -13,7 +13,7 @@ // CHECK-NEXT: TemplateTypeParmType 0x{{[0-9a-fA-F]+}} 'element' dependent depth 0 index 0 // CHECK-NEXT: TemplateTypeParm 0x{{[0-9a-fA-F]+}} 'element' // CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} <> 'int' lvalue -// NonTypeTemplateParm 0x{{[0-9a-fA-F]+}} 'element_count' 'int' +// CHECK-SAME: NonTypeTemplateParm 0x{{[0-9a-fA-F]+}} 'element_count' 'int' // Make sure we got a using directive at the end. // CHECK: UsingDirectiveDecl 0x{{[0-9a-fA-F]+}} <> Namespace 0x{{[0-9a-fA-F]+}} 'hlsl' diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-cast-template.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-cast-template.hlsl new file mode 100644 index 0000000000000..a8b56e86cfa50 --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-cast-template.hlsl @@ -0,0 +1,349 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + + +template +using matrix_3_3 = matrix; + +template +using matrix_4_4 = matrix; + +// CHECK-LABEL: define {{.*}}CastCharMatrixToIntCStyle +void CastCharMatrixToIntCStyle() { + // CHECK: [[C:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i16> [[C]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + + matrix_4_4 c; + matrix_4_4 i; + i = (matrix_4_4)c; +} + +// CHECK-LABEL: define {{.*}}CastCharMatrixToIntStaticCast +void CastCharMatrixToIntStaticCast() { + // CHECK: [[C:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i16> [[C]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + + matrix_4_4 c; + matrix_4_4 i; + i = static_cast>(c); +} + +// CHECK-LABEL: define {{.*}}CastCharMatrixToUnsignedIntCStyle +void CastCharMatrixToUnsignedIntCStyle() { + // CHECK: [[C:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i16> [[C]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 c; + matrix_4_4 u; + u = (matrix_4_4)c; +} + +// CHECK-LABEL: define {{.*}}CastCharMatrixToUnsignedIntStaticCast +void CastCharMatrixToUnsignedIntStaticCast() { + // CHECK: [[C:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i16> [[C]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 c; + matrix_4_4 u; + u = static_cast>(c); +} + +// CHECK-LABEL: define {{.*}}CastUnsignedLongIntMatrixToShortCStyle +void CastUnsignedLongIntMatrixToShortCStyle() { + // CHECK: [[U:%.*]] = load <16 x i64>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i64> {{.*}} to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 u; + matrix_4_4 s; + s = (matrix_4_4)u; +} + +// CHECK-LABEL: define {{.*}}CastUnsignedLongIntMatrixToShortStaticCast +void CastUnsignedLongIntMatrixToShortStaticCast() { + // CHECK: [[U:%.*]] = load <16 x i64>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i64> {{.*}} to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 u; + matrix_4_4 s; + s = static_cast>(u); +} + +// CHECK-LABEL: define {{.*}}CastIntMatrixToShortCStyle +void CastIntMatrixToShortCStyle() { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i32> [[I]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 i; + matrix_4_4 s; + s = (matrix_4_4)i; +} + +// CHECK-LABEL: define {{.*}}CastIntMatrixToShortStaticCast +void CastIntMatrixToShortStaticCast() { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i32> [[I]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 i; + matrix_4_4 s; + s = static_cast>(i); +} + +// CHECK-LABEL: define {{.*}}CastIntMatrixToFloatCStyle +void CastIntMatrixToFloatCStyle() { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV]] = sitofp <16 x i32> {{.*}} to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 i; + matrix_4_4 f; + f = (matrix_4_4)i; +} + +// CHECK-LABEL: define {{.*}}CastIntMatrixToFloatStaticCast +void CastIntMatrixToFloatStaticCast() { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV]] = sitofp <16 x i32> {{.*}} to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 i; + matrix_4_4 f; + f = static_cast>(i); +} + +// CHECK-LABEL: define {{.*}}CastUnsignedIntMatrixToFloatCStyle +void CastUnsignedIntMatrixToFloatCStyle() { + // CHECK: [[U:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = uitofp <16 x i16> [[U]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 u; + matrix_4_4 f; + f = (matrix_4_4)u; +} + +// CHECK-LABEL: define {{.*}}CastUnsignedIntMatrixToFloatStaticCast +void CastUnsignedIntMatrixToFloatStaticCast() { + // CHECK: [[U:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = uitofp <16 x i16> [[U]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 u; + matrix_4_4 f; + f = static_cast>(u); +} + +// CHECK-LABEL: define {{.*}}CastDoubleMatrixToIntCStyle +void CastDoubleMatrixToIntCStyle() { + // CHECK: [[D:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = fptosi <16 x double> [[D]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 d; + matrix_4_4 i; + i = (matrix_4_4)d; +} + +// CHECK-LABEL: define {{.*}}CastDoubleMatrixToIntStaticCast +void CastDoubleMatrixToIntStaticCast() { + // CHECK: [[D:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = fptosi <16 x double> [[D]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 d; + matrix_4_4 i; + i = static_cast>(d); +} + +// CHECK-LABEL: define {{.*}}CastFloatMatrixToUnsignedShortIntCStyle +void CastFloatMatrixToUnsignedShortIntCStyle() { + // CHECK: [[F:%.*]] = load <16 x float>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = fptoui <16 x float> [[F]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 f; + matrix_4_4 i; + i = (matrix_4_4)f; +} + +// CHECK-LABEL: define {{.*}}CastFloatMatrixToUnsignedShortIntStaticCast +void CastFloatMatrixToUnsignedShortIntStaticCast() { + // CHECK: [[F:%.*]] = load <16 x float>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = fptoui <16 x float> [[F]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 f; + matrix_4_4 i; + i = static_cast>(f); +} + +// CHECK-LABEL: define {{.*}}CastDoubleMatrixToFloatCStyle +void CastDoubleMatrixToFloatCStyle() { + // CHECK: [[D:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = fptrunc <16 x double> [[D]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 d; + matrix_4_4 f; + f = (matrix_4_4)d; +} + +// CHECK-LABEL: define {{.*}}CastDoubleMatrixToFloatStaticCast +void CastDoubleMatrixToFloatStaticCast() { + // CHECK: [[D:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = fptrunc <16 x double> [[D]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 d; + matrix_4_4 f; + f = static_cast>(d); +} + +// CHECK-LABEL: define {{.*}}CastUnsignedShortIntToUnsignedIntCStyle +void CastUnsignedShortIntToUnsignedIntCStyle() { + // CHECK: [[S:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = zext <16 x i16> [[S]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 s; + matrix_4_4 i; + i = (matrix_4_4)s; +} + +// CHECK-LABEL: define {{.*}}CastUnsignedShortIntToUnsignedIntStaticCast +void CastUnsignedShortIntToUnsignedIntStaticCast() { + // CHECK: [[S:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = zext <16 x i16> [[S]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 s; + matrix_4_4 i; + i = static_cast>(s); +} + +// CHECK-LABEL: define {{.*}}CastUnsignedLongIntToUnsignedShortIntCStyle +void CastUnsignedLongIntToUnsignedShortIntCStyle() { + // CHECK: [[L:%.*]] = load <16 x i64>, ptr %l, align 8 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i64> [[L]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 l; + matrix_4_4 s; + s = (matrix_4_4)l; +} + +// CHECK-LABEL: define {{.*}}CastUnsignedLongIntToUnsignedShortIntStaticCast +void CastUnsignedLongIntToUnsignedShortIntStaticCast() { + // CHECK: [[L:%.*]] = load <16 x i64>, ptr %l, align 8 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i64> [[L]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + matrix_4_4 l; + matrix_4_4 s; + s = static_cast>(l); +} + +// CHECK-LABEL: define {{.*}}CastUnsignedShortIntToIntCStyle +void CastUnsignedShortIntToIntCStyle() { + // CHECK: [[U:%.*]] = load <16 x i16>, ptr %u, align 2 + // CHECK-NEXT: [[CONV:%.*]] = zext <16 x i16> [[U]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 u; + matrix_4_4 i; + i = (matrix_4_4)u; +} + +// CHECK-LABEL: define {{.*}}CastUnsignedShortIntToIntStaticCast +void CastUnsignedShortIntToIntStaticCast() { + // CHECK: [[U:%.*]] = load <16 x i16>, ptr %u, align 2 + // CHECK-NEXT: [[CONV:%.*]] = zext <16 x i16> [[U]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + matrix_4_4 u; + matrix_4_4 i; + i = static_cast>(u); +} + +// CHECK-LABEL: define {{.*}}CastIntToUnsignedLongIntCStyle +void CastIntToUnsignedLongIntCStyle() { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr %i, align 4 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i32> [[I]] to <16 x i64> + // CHECK-NEXT: store <16 x i64> [[CONV]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + + matrix_4_4 i; + matrix_4_4 u; + u = (matrix_4_4)i; +} + +// CHECK-LABEL: define {{.*}}CastIntToUnsignedLongIntStaticCast +void CastIntToUnsignedLongIntStaticCast() { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr %i, align 4 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i32> [[I]] to <16 x i64> + // CHECK-NEXT: store <16 x i64> [[CONV]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + + matrix_4_4 i; + matrix_4_4 u; + u = static_cast>(i); +} + +class Foo { + int x[10]; + + Foo(matrix_4_4 x); +}; + +// These require mangling. DXIL uses MicrosoftMangle which doesn't support mangling matrices yet. +// CHECK-LABEL: define {{.*}}class_constructor_matrix_ty +Foo class_constructor_matrix_ty(matrix_4_4 m) { + // CHECK: [[M:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: call{{.*}} void @_ZN3FooC1Eu11matrix_typeIL{{[mj]}}4EL{{[mj]}}4EiE(ptr noundef nonnull align 4 dereferenceable(40) %agg.result, <16 x i32> noundef [[M]]) + // CHECK-NEXT: ret void + + return Foo(m); +} + +struct Bar { + float x[10]; + Bar(matrix_3_3 x); +}; + +// CHECK-LABEL: define {{.*}}struct_constructor_matrix_ty +Bar struct_constructor_matrix_ty(matrix_3_3 m) { + // CHECK: [[M:%.*]] = load <9 x float>, ptr {{.*}}, align 4 + // CHECK-NEXT: call{{.*}} void @_ZN3BarC1Eu11matrix_typeIL{{[mj]}}3EL{{[mj]}}3EfE(ptr noundef nonnull align 4 dereferenceable(40) %agg.result, <9 x float> noundef [[M]]) + // CHECK-NEXT: ret void + + return Bar(m); +} diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-cast.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-cast.hlsl new file mode 100644 index 0000000000000..a902b6892e2ba --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-cast.hlsl @@ -0,0 +1,135 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + +// Test explicit matrix casts. +// This is adapted to HLSL from CodeGen/matrix-cast.c. + +// CHECK-LABEL: define {{.*}}cast_int16_matrix_to_int +void cast_int16_matrix_to_int( int16_t4x4 c, int4x4 i) { + // CHECK: [[C:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i16> [[C]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + i = (int4x4)c; +} + +// CHECK-LABEL: define {{.*}}cast_int16_matrix_to_uint +void cast_int16_matrix_to_uint( int16_t4x4 c, uint4x4 u) { + // CHECK: [[C:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i16> [[C]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + u = (uint4x4)c; +} + +// CHECK-LABEL: define {{.*}}cast_uint64_matrix_to_int16 +void cast_uint64_matrix_to_int16( uint64_t4x4 u, int16_t4x4 s) { + // CHECK: [[U:%.*]] = load <16 x i64>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i64> [[U]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + s = (int16_t4x4)u; +} + +// CHECK-LABEL: define {{.*}}cast_int_matrix_to_int16 +void cast_int_matrix_to_int16( int4x4 i, int16_t4x4 s) { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i32> [[I]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + s = (int16_t4x4)i; +} + +// CHECK-LABEL: define {{.*}}cast_int_matrix_to_float +void cast_int_matrix_to_float( int4x4 i, float4x4 f) { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = sitofp <16 x i32> [[I]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + f = (float4x4)i; +} + +// CHECK-LABEL: define {{.*}}cast_uint_matrix_to_float +void cast_uint_matrix_to_float( uint16_t4x4 u, float4x4 f) { + // CHECK: [[U:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = uitofp <16 x i16> [[U]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + f = (float4x4)u; +} + +// CHECK-LABEL: define {{.*}}cast_double_matrix_to_int +void cast_double_matrix_to_int( double4x4 d, int4x4 i) { + // CHECK: [[D:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = fptosi <16 x double> [[D]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + i = (int4x4)d; +} + +// CHECK-LABEL: define {{.*}}cast_float_matrix_to_uint16 +void cast_float_matrix_to_uint16( float4x4 f, uint16_t4x4 i) { + // CHECK: [[F:%.*]] = load <16 x float>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = fptoui <16 x float> [[F]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + i = (uint16_t4x4)f; +} + +// CHECK-LABEL: define {{.*}}cast_double_matrix_to_float +void cast_double_matrix_to_float( double4x4 d, float4x4 f) { + // CHECK: [[D:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = fptrunc <16 x double> [[D]] to <16 x float> + // CHECK-NEXT: store <16 x float> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + f = (float4x4)d; +} + +// CHECK-LABEL: define {{.*}}cast_uint16_to_uint +void cast_uint16_to_uint( uint16_t4x4 s, uint4x4 i) { + // CHECK: [[S:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = zext <16 x i16> [[S]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + i = (uint4x4)s; +} + +// CHECK-LABEL: define {{.*}}cast_uint64_to_uint16 +void cast_uint64_to_uint16( uint64_t4x4 l, uint16_t4x4 s) { + // CHECK: [[L:%.*]] = load <16 x i64>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i64> [[L]] to <16 x i16> + // CHECK-NEXT: store <16 x i16> [[CONV]], ptr {{.*}}, align 2 + // CHECK-NEXT: ret void + + s = (uint16_t4x4)l; +} + +// CHECK-LABEL: define {{.*}}cast_uint16_to_int +void cast_uint16_to_int( uint16_t4x4 u, int4x4 i) { + // CHECK: [[U:%.*]] = load <16 x i16>, ptr {{.*}}, align 2 + // CHECK-NEXT: [[CONV:%.*]] = zext <16 x i16> [[U]] to <16 x i32> + // CHECK-NEXT: store <16 x i32> [[CONV]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + i = (int4x4)u; +} + +// CHECK-LABEL: define {{.*}}cast_int_to_uint64 +void cast_int_to_uint64( int4x4 i, uint64_t4x4 u) { + // CHECK: [[I:%.*]] = load <16 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[CONV:%.*]] = sext <16 x i32> [[I]] to <16 x i64> + // CHECK-NEXT: store <16 x i64> [[CONV]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + + u = (uint64_t4x4)i; +} diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-transpose-template.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-transpose-template.hlsl new file mode 100644 index 0000000000000..dd77fecbbe0d5 --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-transpose-template.hlsl @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple spirv-unknown-vulkan-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple dxil-pc-shadermodel6.3-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// Test the matrix type transpose builtin. + +template +using matrix_t = matrix; + +template +struct MyMatrix { + matrix_t value; +}; + +// Can't test utility function with matrix param without mangling. +template +MyMatrix transpose(const MyMatrix M) { + MyMatrix Res; + Res.value = __builtin_matrix_transpose(M.value); + return Res; +} + +// CHECK-LABEL: define{{.*}} void @_Z24test_transpose_template1v() +void test_transpose_template1() { + // CHECK: call{{.*}} void @_Z9transposeIiLj3ELj4EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE(ptr dead_on_unwind writable sret(%struct.MyMatrix.0) align 4 %M1_t, ptr byval(%struct.MyMatrix) align 4 %agg.tmp) + // CHECK-LABEL: define{{.*}} void @_Z9transposeIiLj3ELj4EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE( + // CHECK: [[M:%.*]] = load <12 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32> [[M]], i32 3, i32 4) + + MyMatrix M1; + MyMatrix M1_t = transpose(M1); +} + +// CHECK-LABEL: define{{.*}} void @_Z24test_transpose_template2 +void test_transpose_template2(inout MyMatrix M) { + // CHECK: call{{.*}} void @_Z9transposeIdLj3ELj2EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE(ptr dead_on_unwind writable sret(%struct.MyMatrix.1) align 8 %agg.tmp1, ptr byval(%struct.MyMatrix.2) align 8 %agg.tmp2) + // CHECK-NEXT: call{{.*}} void @_Z9transposeIdLj2ELj3EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE(ptr dead_on_unwind writable sret(%struct.MyMatrix.2) align 8 %agg.tmp, ptr byval(%struct.MyMatrix.1) align 8 %agg.tmp1) + // CHECK-NEXT: call{{.*}} void @_Z9transposeIdLj3ELj2EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE(ptr dead_on_unwind writable sret(%struct.MyMatrix.1) align 8 %M2_t, ptr byval(%struct.MyMatrix.2) align 8 %agg.tmp) + + // CHECK-LABEL: define{{.*}} void @_Z9transposeIdLj3ELj2EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE( + // CHECK: [[M:%.*]] = load <6 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[M_T:%.*]] = call <6 x double> @llvm.matrix.transpose.v6f64(<6 x double> [[M]], i32 3, i32 2) + // CHECK-NEXT: [[RES_ADDR:%.*]] = getelementptr inbounds nuw %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0 + // CHECK-NEXT: store <6 x double> [[M_T]], ptr [[RES_ADDR]], align 8 + + // CHECK-LABEL: define{{.*}} void @_Z9transposeIdLj2ELj3EE8MyMatrixIT_XT1_EXT0_EES0_IS1_XT0_EXT1_EE( + // CHECK: [[M:%.*]] = load <6 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[M_T:%.*]] = call <6 x double> @llvm.matrix.transpose.v6f64(<6 x double> [[M]], i32 2, i32 3) + // CHECK-NEXT: [[RES_ADDR:%.*]] = getelementptr inbounds nuw %struct.MyMatrix.2, ptr %agg.result, i32 0, i32 0 + // CHECK-NEXT: store <6 x double> [[M_T]], ptr [[RES_ADDR]], align 8 + + MyMatrix M2_t = transpose(transpose(transpose(M))); +} + +matrix_t get_matrix(); + +// CHECK-LABEL: define{{.*}} void @_Z21test_transpose_rvaluev() +void test_transpose_rvalue() { + // CHECK: [[M_T_ADDR:%.*]] = alloca [9 x float], align 4 + // CHECK-NEXT: [[CALL_RES:%.*]] = call{{.*}} <9 x float> @_Z10get_matrixv() + // CHECK-NEXT: [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], splat (float 2.000000e+00) + // CHECK-NEXT: [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[ADD]], i32 3, i32 3) + // CHECK-NEXT: store <9 x float> [[M_T]], ptr [[M_T_ADDR]], align 4 + matrix_t m_t = __builtin_matrix_transpose(get_matrix() + 2.0); +} + +// CHECK-LABEL: define{{.*}} void @_Z20test_transpose_const +void test_transpose_const(const matrix_t m) { + // CHECK: [[MATRIX:%.*]] = load <9 x float>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[MATRIX]], i32 3, i32 3) + // CHECK-NEXT: store <9 x float> [[M_T]], ptr %m_t, align 4 + matrix_t m_t = __builtin_matrix_transpose(m); +} + +// TODO: Enable once initialization support is defined and implemented for +// matrix types. +// void test_lvalue_conversion() { +// constexpr double4x4 m = {}; +// [] { return __builtin_matrix_transpose(m); } +//} + diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-transpose.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-transpose.hlsl new file mode 100644 index 0000000000000..1431a5daf8a01 --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-transpose.hlsl @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple spirv-unknown-vulkan-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple dxil-pc-shadermodel6.3-compute -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// Tests the matrix type transformation builtin. + +// CHECK-LABEL: define {{.*}}transpose_double_4x4 +void transpose_double_4x4(double4x4 a) { + // CHECK: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8 + // CHECK-NEXT: [[TRANS:%.*]] = call <16 x double> @llvm.matrix.transpose.v16f64(<16 x double> [[A]], i32 4, i32 4) + // CHECK-NEXT: store <16 x double> [[TRANS]], ptr %a_t, align 8 + + double4x4 a_t = __builtin_matrix_transpose(a); +} + +// CHECK-LABEL: define {{.*}}transpose_float_3x2 +void transpose_float_3x2(float3x2 a) { + // CHECK: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[TRANS:%.*]] = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> [[A]], i32 3, i32 2) + // CHECK-NEXT: store <6 x float> [[TRANS]], ptr %a_t, align 4 + + float2x3 a_t = __builtin_matrix_transpose(a); +} + +// CHECK-LABEL: define {{.*}}transpose_int_4x3 +void transpose_int_4x3(int4x3 a) { + // CHECK: [[A:%.*]] = load <12 x i32>, ptr {{.*}}, align 4 + // CHECK-NEXT: [[TRANS:%.*]] = call <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32> [[A]], i32 4, i32 3) + // CHECK-NEXT: store <12 x i32> [[TRANS]], ptr %a_t, align 4 + + int3x4 a_t = __builtin_matrix_transpose(a); +} + +struct Foo { + uint1x4 In; + uint4x1 Out; +}; + +// CHECK-LABEL: define {{.*}}transpose_struct_member +void transpose_struct_member(struct Foo F) { + // CHECK: [[IN_PTR:%.*]] = getelementptr inbounds nuw %struct.Foo, ptr %F, i32 0, i32 0 + // CHECK-NEXT: [[M:%.*]] = load <4 x i32>, ptr [[IN_PTR]], align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <4 x i32> @llvm.matrix.transpose.v4i32(<4 x i32> [[M]], i32 1, i32 4) + // CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr inbounds nuw %struct.Foo, ptr %F, i32 0, i32 1 + // CHECK-NEXT: store <4 x i32> [[M_T]], ptr [[OUT_PTR]], align 4 + + F.Out = __builtin_matrix_transpose(F.In); +} + +// CHECK-LABEL: define {{.*}}transpose_transpose_struct_member +void transpose_transpose_struct_member(struct Foo F) { + // CHECK: [[IN_PTR:%.*]] = getelementptr inbounds nuw %struct.Foo, ptr %F, i32 0, i32 0 + // CHECK-NEXT: [[M:%.*]] = load <4 x i32>, ptr [[IN_PTR]], align 4 + // CHECK-NEXT: [[M_T:%.*]] = call <4 x i32> @llvm.matrix.transpose.v4i32(<4 x i32> [[M]], i32 1, i32 4) + // CHECK-NEXT: [[M_T2:%.*]] = call <4 x i32> @llvm.matrix.transpose.v4i32(<4 x i32> [[M_T]], i32 4, i32 1) + // CHECK: [[OUT_PTR:%.*]] = getelementptr inbounds nuw %struct.Foo, ptr %F, i32 0, i32 0 + // CHECK-NEXT: store <4 x i32> [[M_T2]], ptr [[OUT_PTR]], align 4 + + F.In = __builtin_matrix_transpose(__builtin_matrix_transpose(F.In)); +} + +double4x4 get_matrix(void); + +// CHECK-LABEL: define {{.*}}transpose_rvalue +void transpose_rvalue(void) { + // CHECK: [[M_T_ADDR:%.*]] = alloca [16 x double], align 8 + // CHECK-NEXT: [[CALL:%.*]] = call{{.*}} <16 x double> @_Z10get_matrixv() + // CHECK-NEXT: [[M_T:%.*]] = call <16 x double> @llvm.matrix.transpose.v16f64(<16 x double> [[CALL]], i32 4, i32 4) + // CHECK-NEXT: store <16 x double> [[M_T]], ptr [[M_T_ADDR]], align 8 + + double4x4 m_t = __builtin_matrix_transpose(get_matrix()); +} + +double4x4 global_matrix; + +// CHECK-LABEL: define {{.*}}transpose_global +void transpose_global(void) { + // CHECK: [[M_T_ADDR:%.*]] = alloca [16 x double], align 8 + // CHECK-NEXT: [[GLOBAL_MATRIX:%.*]] = load <16 x double>, ptr @global_matrix, align 8 + // CHECK-NEXT: [[M_T:%.*]] = call <16 x double> @llvm.matrix.transpose.v16f64(<16 x double> [[GLOBAL_MATRIX]], i32 4, i32 4) + // CHECK-NEXT: store <16 x double> [[M_T]], ptr [[M_T_ADDR]], align 8 + + double4x4 m_t = __builtin_matrix_transpose(global_matrix); +} diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type-operators-template.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type-operators-template.hlsl new file mode 100644 index 0000000000000..7d45039709074 --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type-operators-template.hlsl @@ -0,0 +1,449 @@ +// RUN: %clang_cc1 -O0 -triple spirv-unknown-vulkan-compute -std=hlsl202y -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV,NOOPT -DIPTR_T=i64 -DALIGN=8 +// RUN: %clang_cc1 -O1 -triple spirv-unknown-vulkan-compute -std=hlsl202y -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV,OPT -DIPTR_T=i64 -DALIGN=8 +// RUN: %clang_cc1 -O0 -triple dxil-pc-shadermodel6.3-compute -std=hlsl202y -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOOPT -DIPTR_T=i32 -DALIGN=4 +// RUN: %clang_cc1 -O1 -triple dxil-pc-shadermodel6.3-compute -std=hlsl202y -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,OPT -DIPTR_T=i32 -DALIGN=4 + +template +struct MyMatrix { + using matrix_t = matrix; + + matrix_t value; +}; + +template +typename MyMatrix::matrix_t add(inout MyMatrix A, inout MyMatrix B) { + return A.value + B.value; +} + +// CHECK-LABEL: define {{.*}}test_add_template +void test_add_template() { + // CHECK: call{{.*}} <8 x float> @_Z3addIfLj2ELj4EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tES2_S2_(ptr noalias noundef nonnull align 4 dereferenceable(32) %{{.*}}, ptr noalias noundef nonnull align 4 dereferenceable(32) %{{.*}}) + + // CHECK-LABEL: define{{.*}} <8 x float> @_Z3addIfLj2ELj4EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tES2_S2_( + // NOOPT: [[MAT1:%.*]] = load <8 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT: [[MAT2:%.*]] = load <8 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT1:%.*]] = load <8 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT: [[MAT2:%.*]] = load <8 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fadd <8 x float> [[MAT1]], [[MAT2]] + // CHECK-NEXT: ret <8 x float> [[RES]] + + MyMatrix Mat1; + MyMatrix Mat2; + Mat1.value = add(Mat1, Mat2); +} + +template +typename MyMatrix::matrix_t subtract(inout MyMatrix A, inout MyMatrix B) { + return A.value - B.value; +} + +// CHECK-LABEL: define {{.*}}test_subtract_template +void test_subtract_template() { + // CHECK: call{{.*}} <8 x float> @_Z8subtractIfLj2ELj4EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tES2_S2_(ptr noalias noundef nonnull align 4 dereferenceable(32) %{{.*}}, ptr noalias noundef nonnull align 4 dereferenceable(32) %{{.*}}) + + // CHECK-LABEL: define{{.*}} <8 x float> @_Z8subtractIfLj2ELj4EEN8MyMatrixIT_XT0_EXT1_EE8matrix_tES2_S2_( + // NOOPT: [[MAT1:%.*]] = load <8 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT: [[MAT2:%.*]] = load <8 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT1:%.*]] = load <8 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT: [[MAT2:%.*]] = load <8 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fsub <8 x float> [[MAT1]], [[MAT2]] + // CHECK-NEXT: ret <8 x float> [[RES]] + + MyMatrix Mat1; + MyMatrix Mat2; + Mat1.value = subtract(Mat1, Mat2); +} + +struct DoubleWrapper1 { + int x; + operator double() { + return x; + } +}; + +// CHECK-LABEL: define {{.*}}test_DoubleWrapper1_Sub1 +void test_DoubleWrapper1_Sub1(inout MyMatrix m) { + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR:%.*]] = call{{.*}} double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1) + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <12 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + + DoubleWrapper1 w1; + w1.x = 10; + m.value = m.value - w1; +} + +// CHECK-LABEL: define {{.*}}test_DoubleWrapper1_Sub2 +void test_DoubleWrapper1_Sub2(inout MyMatrix m) { + // CHECK: [[SCALAR:%.*]] = call{{.*}} double @_ZN14DoubleWrapper1cvdEv(ptr {{[^,]*}} %w1) + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <12 x double> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + + DoubleWrapper1 w1; + w1.x = 10; + m.value = w1 - m.value; +} + +struct DoubleWrapper2 { + int x; + operator double() { + return x; + } +}; + +// CHECK-LABEL: define {{.*}}test_DoubleWrapper2_Add1 +void test_DoubleWrapper2_Add1(inout MyMatrix m) { + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.+}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.+}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK: [[SCALAR:%.*]] = call{{.*}} double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2) + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <12 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + + DoubleWrapper2 w2; + w2.x = 20; + m.value = m.value + w2; +} + +// CHECK-LABEL: define {{.*}}test_DoubleWrapper2_Add2 +void test_DoubleWrapper2_Add2(inout MyMatrix m) { + // CHECK: [[SCALAR:%.*]] = call{{.*}} double @_ZN14DoubleWrapper2cvdEv(ptr {{[^,]*}} %w2) + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <12 x double> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + + DoubleWrapper2 w2; + w2.x = 20; + m.value = w2 + m.value; +} + +struct IntWrapper { + uint16_t x; + operator int() { + return x; + } +}; + +// CHECK-LABEL: define {{.*}}test_IntWrapper_Add +void test_IntWrapper_Add(inout MyMatrix m) { + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR:%.*]] = call{{.*}} i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3) + // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 [[SCALAR]] to double + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR_FP]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <12 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + + IntWrapper w3; + w3.x = 13; + m.value = m.value + w3; +} + +// CHECK-LABEL: define {{.*}}test_IntWrapper_Sub +void test_IntWrapper_Sub(inout MyMatrix m) { + // CHECK: [[SCALAR:%.*]] = call{{.*}} i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %w3) + // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 [[SCALAR]] to double + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR_FP]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <12 x double> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + + IntWrapper w3; + w3.x = 13; + m.value = w3 - m.value; +} + +template +typename MyMatrix::matrix_t multiply(inout MyMatrix A, inout MyMatrix B) { + return A.value * B.value; +} + +// CHECK-LABEL: define {{.*}}test_multiply_template +MyMatrix test_multiply_template(MyMatrix Mat1, + MyMatrix Mat2) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %tmp = alloca %struct.MyMatrix, align 4 + // CHECK-NEXT: %tmp1 = alloca %struct.MyMatrix.2, align 4 + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.[[IPTR_T]](ptr align 4 %tmp, ptr align 4 %Mat1, [[IPTR_T]] 32, i1 false) + // OPT-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %tmp) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.[[IPTR_T]](ptr align 4 %tmp1, ptr align 4 %Mat2, [[IPTR_T]] 32, i1 false) + // OPT-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %tmp1) + // CHECK-NEXT: [[RES:%.*]] = call{{.*}} <4 x float> @_Z8multiplyIfLj2ELj4ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tES0_IS1_XT0_EXT1_EES0_IS1_XT1_EXT2_EE(ptr noalias noundef nonnull align 4 dereferenceable(32) %tmp, ptr noalias noundef nonnull align 4 dereferenceable(32) %tmp1) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.[[IPTR_T]](ptr align 4 %Mat1, ptr align 4 %tmp, [[IPTR_T]] 32, i1 false) + // OPT-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %tmp) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.[[IPTR_T]](ptr align 4 %Mat2, ptr align 4 %tmp1, [[IPTR_T]] 32, i1 false) + // OPT-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %tmp1) + // CHECK-NEXT: %value = getelementptr inbounds nuw %struct.MyMatrix.1, ptr %agg.result, i32 0, i32 0 + // CHECK-NEXT: store <4 x float> [[RES]], ptr %value, align 4 + // CHECK-NEXT: ret void + // + // CHECK-LABEL: define{{.*}} <4 x float> @_Z8multiplyIfLj2ELj4ELj2EEN8MyMatrixIT_XT0_EXT2_EE8matrix_tES0_IS1_XT0_EXT1_EES0_IS1_XT1_EXT2_EE( + // NOOPT: [[MAT1:%.*]] = load <8 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT: [[MAT2:%.*]] = load <8 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT1:%.*]] = load <8 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT: [[MAT2:%.*]] = load <8 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.matrix.multiply.v4f32.v8f32.v8f32(<8 x float> [[MAT1]], <8 x float> [[MAT2]], i32 2, i32 4, i32 2) + // CHECK-NEXT: ret <4 x float> [[RES]] + + MyMatrix Res; + Res.value = multiply(Mat1, Mat2); + return Res; +} + +// CHECK-LABEL: define {{.*}}test_IntWrapper_Multiply +void test_IntWrapper_Multiply(inout MyMatrix m, inout IntWrapper w3) { + // CHECK: [[SCALAR:%.*]] = call{{.*}} i32 @_ZN10IntWrappercviEv(ptr noundef {{.*}}) + // CHECK-NEXT: [[SCALAR_FP:%.*]] = sitofp i32 [[SCALAR]] to double + // NOOPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x double> poison, double [[SCALAR_FP]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x double> [[SCALAR_EMBED]], <12 x double> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <12 x double> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK: store <12 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + m.value = w3 * m.value; +} + +template +void insert(inout MyMatrix Mat, EltTy e, unsigned i, unsigned j) { + Mat.value[i][j] = e; +} + +// CHECK-LABEL: define {{.*}}test_insert_template1 +void test_insert_template1(inout MyMatrix Mat, unsigned e, unsigned i, unsigned j) { + // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align [[ALIGN]]{{$}} + // NOOPT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}} + // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // OPT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: call{{.*}} void @_Z6insertIjLj2ELj2EEv8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noalias noundef nonnull align 4 dereferenceable(16) %{{.*}}, i32 noundef [[E]], i32 noundef [[I]], i32 noundef [[J]]) + // CHECK: ret void + // + // CHECK-LABEL: define{{.*}} void @_Z6insertIjLj2ELj2EEv8MyMatrixIT_XT0_EXT1_EES1_jj( + // NOOPT: [[E:%.*]] = load i32, ptr %e.addr, align 4{{$}} + // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT: [[E:%.*]] = load i32, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I:%.*]] = zext i32 {{.*}} to i64 + // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J:%.*]] = zext i32 {{.*}} to i64 + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[J]], 2 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[I]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 4 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <4 x i32> [[MATINS]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + insert(Mat, e, i, j); +} + +// CHECK-LABEL: define {{.*}}test_insert_template2 +void test_insert_template2(inout MyMatrix Mat, float e) { + // NOOPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align [[ALIGN]]{{$}} + // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} + // OPT: [[MAT_ADDR:%.*]] = load ptr, ptr %Mat.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: call{{.*}} void @_Z6insertIfLj3ELj4EEv8MyMatrixIT_XT0_EXT1_EES1_jj(ptr noalias noundef nonnull align 4 dereferenceable(48) %{{.*}}, float noundef [[E]], i32 noundef 2, i32 noundef 3) + // CHECK: ret void + // + // CHECK-LABEL: define{{.*}} void @_Z6insertIfLj3ELj4EEv8MyMatrixIT_XT0_EXT1_EES1_jj( + // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} + // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I:%.*]] = zext i32 {{.*}} to i64 + // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J:%.*]] = zext i32 {{.*}} to i64 + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[J]], 3 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[I]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 12 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <12 x float>, ptr {{.*}}, align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <12 x float> [[MAT]], float [[E]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <12 x float> [[MATINS]], ptr {{.*}}, align 4 + // CHECK-NEXT: ret void + + insert(Mat, e, 2, 3); +} + +template +EltTy extract(inout MyMatrix Mat) { + return Mat.value[1u][0u]; +} + +// CHECK-LABEL: define {{.*}}test_extract_template +int test_extract_template(MyMatrix Mat1) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %tmp = alloca %struct.MyMatrix.5, align 4 + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.[[IPTR_T]](ptr align 4 %tmp, ptr align 4 %Mat1, [[IPTR_T]] 16, i1 false) + // OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %tmp) + // CHECK-NEXT: [[CALL:%.*]] = call{{.*}} i32 @_Z7extractIiLj2ELj2EET_8MyMatrixIS0_XT0_EXT1_EE(ptr noalias noundef nonnull align 4 dereferenceable(16) %tmp) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.[[IPTR_T]](ptr align 4 %Mat1, ptr align 4 %tmp, [[IPTR_T]] 16, i1 false) + // OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %tmp) + // CHECK-NEXT: ret i32 [[CALL]] + // + // CHECK-LABEL: define{{.*}} i32 @_Z7extractIiLj2ELj2EET_8MyMatrixIS0_XT0_EXT1_EE( + // NOOPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT:%.*]] = load <4 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <4 x i32> [[MAT]], [[IPTR_T]] 1 + // CHECK-NEXT: ret i32 [[MATEXT]] + + return extract(Mat1); +} + +template +auto matrix_subscript(double4x4 m, R r, C c) -> decltype(m[r][c]) {} + +// CHECK-LABEL: define {{.*}}test_matrix_subscript +double test_matrix_subscript(double4x4 m) { + // NOOPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[CALL:%.*]] = call{{.*}} nonnull align 8 dereferenceable(8) ptr @_Z16matrix_subscriptIiiEDTixixfp_fp0_fp1_Eu11matrix_typeIL{{[mj]}}4EL{{[mj]}}4EdET_T0_(<16 x double> noundef [[MAT]], i32 noundef 1, i32 noundef 2) + // NOOPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8{{$}} + // OPT-NEXT: [[RES:%.*]] = load double, ptr [[CALL]], align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: ret double [[RES]] + + return matrix_subscript(m, 1, 2); +} + +// CHECK-LABEL: define {{.*}}test_matrix_subscript_const +const double test_matrix_subscript_const(const double4x4 m) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [16 x double], align 8 + // CHECK-NEXT: store <16 x double> [[M:%.*]], ptr [[M_ADDR]], align 8 + // NOOPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8{{$}} + // OPT: [[NAMELESS1:%.*]] = load <16 x double>, ptr [[M_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[NAMELESS1]], [[IPTR_T]] 4 + // CHECK-NEXT: ret double [[MATEXT]] + + return m[0][1]; +} + +struct UnsignedWrapper { + char x; + operator unsigned() { + return x; + } +}; + +// CHECK-LABEL: define {{.*}}extract_IntWrapper_idx +double extract_IntWrapper_idx(inout double4x4 m, IntWrapper i, UnsignedWrapper j) { + // CHECK: [[I:%.*]] = call{{.*}} i32 @_ZN10IntWrappercviEv(ptr {{[^,]*}} %i) + // CHECK-NEXT: [[I_ADD:%.*]] = add nsw i32 [[I]], 1 + // SPIRV-NEXT: [[I_ADD:%.*]] = sext i32 {{.*}} to i64 + // CHECK-NEXT: [[J:%.*]] = call{{.*}} i32 @_ZN15UnsignedWrappercvjEv(ptr {{[^,]*}} %j) + // CHECK-NEXT: [[J_SUB:%.*]] = sub i32 [[J]], 1 + // SPIRV-NEXT: [[J_SUB:%.*]] = zext i32 {{.*}} to i64 + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[J_SUB]], 4 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[I_ADD]] + // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align [[ALIGN]]{{$}} + // NOOPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8{{$}} + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 16 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[MAT_ADDR]], align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: ret double [[MATEXT]] + return m[i + 1][j - 1]; +} + +template +using matrix_type = matrix; +struct identmatrix_t { + template + operator matrix_type() const { + matrix_type result; + for (unsigned i = 0; i != N; ++i) + result[i][i] = 1; + return result; + } +}; + +constexpr identmatrix_t identmatrix; + +// CHECK-LABEL: define {{.*}}test_constexpr1 +void test_constexpr1(inout matrix_type m) { + // NOOPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT:%.*]] = load <16 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[IM:%.*]] = call{{.*}} <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix) + // CHECK-NEXT: [[ADD:%.*]] = fadd <16 x float> [[MAT]], [[IM]] + // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align [[ALIGN]]{{$}} + // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: store <16 x float> [[ADD]], ptr [[MAT_ADDR]], align 4 + // CHECK-NEXT: ret voi + + // CHECK-LABEL: define{{.*}} <16 x float> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIfLj4EEEv( + // CHECK-LABEL: for.body: ; preds = %for.cond + // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}} + // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I:%.*]] = zext i32 {{.*}} to i64 + // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}} + // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I2:%.*]] = zext i32 {{.*}} to i64 + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[I2]], 4 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[I]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 16 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <16 x float>, ptr %result, align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <16 x float> [[MATINS]], ptr %result, align 4 + // CHECK-NEXT: br label %for.inc + m = m + identmatrix; +} + +// CHECK-LABEL: define {{.*}}test_constexpr2 +void test_constexpr2(inout matrix_type m) { + // CHECK: [[IM:%.*]] = call{{.*}} <16 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj4EEEv(ptr {{[^,]*}} @_ZL11identmatrix) + // NOOPT: [[MAT:%.*]] = load <16 x i32>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT:%.*]] = load <16 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SUB:%.*]] = sub <16 x i32> [[IM]], [[MAT]] + // CHECK-NEXT: [[SUB2:%.*]] = add <16 x i32> [[SUB]], splat (i32 1) + // NOOPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align [[ALIGN]]{{$}} + // OPT-NEXT: [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: store <16 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4 + // CHECK-NEXT: ret void + // + + // CHECK-LABEL: define{{.*}} <16 x i32> @_ZNK13identmatrix_tcvu11matrix_typeIXT0_EXT0_ET_EIiLj4EEEv( + // CHECK-LABEL: for.body: ; preds = %for.cond + // NOOPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4{{$}} + // OPT-NEXT: [[I:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I:%.*]] = zext i32 {{.*}} to i64 + // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4{{$}} + // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I2:%.*]] = zext i32 {{.*}} to i64 + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[I2]], 4 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[I]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 16 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <16 x i32>, ptr %result, align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x i32> [[MAT]], i32 1, [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <16 x i32> [[MATINS]], ptr %result, align 4 + // CHECK-NEXT: br label %for.inc + + m = identmatrix - m + 1; +} diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type-operators.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type-operators.hlsl new file mode 100644 index 0000000000000..be412d23a676f --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type-operators.hlsl @@ -0,0 +1,1255 @@ +// RUN: %clang_cc1 -O0 -triple spirv-unknown-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV,NOOPT -DIPTR_T=i64 -DALIGN=8 +// RUN: %clang_cc1 -O1 -triple spirv-unknown-vulkan-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV,OPT -DIPTR_T=i64 -DALIGN=8 +// RUN: %clang_cc1 -O0 -triple dxil-pc-shadermodel6.3-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,DXIL,NOOPT -DIPTR_T=i32 -DALIGN=4 +// RUN: %clang_cc1 -O1 -triple dxil-pc-shadermodel6.3-compute -finclude-default-header -fnative-half-type -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,DXIL,OPT -DIPTR_T=i32 -DALIGN=4 + +// Test arithmetic operations on matrix types. +// This is adapted to HLSL from CodeGen/matrix-type-operators.c. + +// Floating point matrix/scalar additions. + +// CHECK-LABEL: define {{.*}}add_matrix_matrix_double +void add_matrix_matrix_double(double4x4 a, double4x4 b, double4x4 c) { + // NOOPT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[C:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[C:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fadd <16 x double> [[B]], [[C]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a = b + c; +} + +// CHECK-LABEL: define {{.*}}add_compound_assign_matrix_double +void add_compound_assign_matrix_double(double4x4 a, double4x4 b) { + // NOOPT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fadd <16 x double> [[A]], [[B]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a += b; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_assign_matrix_double +void subtract_compound_assign_matrix_double(double4x4 a, double4x4 b) { + // NOOPT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fsub <16 x double> [[A]], [[B]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a -= b; +} + +// CHECK-LABEL: define {{.*}}add_matrix_matrix_float +void add_matrix_matrix_float(float2x3 a, float2x3 b, float2x3 c) { + // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[C:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[B]], [[C]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + + a = b + c; +} + +// CHECK-LABEL: define {{.*}}add_compound_assign_matrix_float +void add_compound_assign_matrix_float(float2x3 a, float2x3 b) { + // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[A]], [[B]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + + a += b; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_assign_matrix_float +void subtract_compound_assign_matrix_float(float2x3 a, float2x3 b) { + // NOOPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[A]], [[B]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + + a -= b; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_double_float +void add_matrix_scalar_double_float(double4x4 a, float vf) { + // NOOPT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr {{.*}}, align 4{{$}} + // OPT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <16 x double> poison, double [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <16 x double> [[SCALAR_EMBED]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <16 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a = a + vf; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_double_float +void add_compound_matrix_scalar_double_float(double4x4 a, float vf) { + // NOOPT: [[SCALAR:%.*]] = load float, ptr {{.*}}, align 4{{$}} + // OPT: [[SCALAR:%.*]] = load float, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double + // NOOPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <16 x double> poison, double [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <16 x double> [[SCALAR_EMBED]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <16 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a += vf; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_double_float +void subtract_compound_matrix_scalar_double_float(double4x4 a, float vf) { + // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} + // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = fpext float [[SCALAR]] to double + // NOOPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <16 x double> poison, double [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <16 x double> [[SCALAR_EMBED]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <16 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a -= vf; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_double_double +void add_matrix_scalar_double_double(double4x4 a, double vd) { + // NOOPT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <16 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <16 x double> [[SCALAR_EMBED]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <16 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + + a = a + vd; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_double_double +void add_compound_matrix_scalar_double_double(double4x4 a, double vd) { + // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <16 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <16 x double> [[SCALAR_EMBED]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <16 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // store <16 x double> [[RES]], ptr {{.*}}, align 8 + a += vd; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_double_double +void subtract_compound_matrix_scalar_double_double(double4x4 a, double vd) { + // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <16 x double> poison, double [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <16 x double> [[SCALAR_EMBED]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <16 x double> [[MATRIX]], [[SCALAR_EMBED1]] + // store <16 x double> [[RES]], ptr {{.*}}, align 8 + a -= vd; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_float_float +void add_matrix_scalar_float_float(float2x3 b, float vf) { + // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} + // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + + b = b + vf; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_float_float +void add_compound_matrix_scalar_float_float(float2x3 b, float vf) { + // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}} + // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + b += vf; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_float_float +void subtract_compound_matrix_scalar_float_float(float2x3 b, float vf) { + // NOOPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4{{$}} + // OPT: [[SCALAR:%.*]] = load float, ptr %vf.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr %b.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + b -= vf; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_float_double +void add_matrix_scalar_float_double(float2x3 b, double vd) { + // NOOPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + + b = b + vd; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_float_double +void add_compound_matrix_scalar_float_double(float2x3 b, double vd) { + // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float + // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fadd <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + b += vd; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_float_double +void subtract_compound_matrix_scalar_float_double(float2x3 b, double vd) { + // NOOPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load double, ptr %vd.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = fptrunc double [[SCALAR]] to float + // NOOPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <6 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <6 x float> poison, float [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <6 x float> [[SCALAR_EMBED]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fsub <6 x float> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr {{.*}}, align 4 + b -= vd; +} + +// Integer matrix/scalar additions + +// CHECK-LABEL: define {{.*}}add_matrix_matrix_int +void add_matrix_matrix_int(int4x3 a, int4x3 b, int4x3 c) { + // NOOPT: [[B:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[C:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[C:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[B]], [[C]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr {{.*}}, align 4 + a = b + c; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_matrix_int +void add_compound_matrix_matrix_int(int4x3 a, int4x3 b) { + // NOOPT: [[B:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[A]], [[B]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr {{.*}}, align 4 + a += b; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_matrix_int +void subtract_compound_matrix_matrix_int(int4x3 a, int4x3 b) { + // NOOPT: [[B:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = sub <12 x i32> [[A]], [[B]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr {{.*}}, align 4 + a -= b; +} + +// CHECK-LABEL: define {{.*}}add_matrix_matrix_uint64 +void add_matrix_matrix_uint64(uint64_t4x2 a, uint64_t4x2 b, uint64_t4x2 c) { + // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[C:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[B]], [[C]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + a = b + c; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_matrix_uint64 +void add_compound_matrix_matrix_uint64(uint64_t4x2 a, uint64_t4x2 b) { + // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[A]], [[B]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + a += b; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_matrix_uint64 +void subtract_compound_matrix_matrix_uint64(uint64_t4x2 a, uint64_t4x2 b) { + // NOOPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT: [[B:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT-NEXT: [[A:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[A]], [[B]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + a -= b; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_int_int16 +void add_matrix_scalar_int_int16(int4x3 a, int16_t vs) { + // NOOPT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a = a + vs; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_int_int16 +void add_compound_matrix_scalar_int_int16(int4x3 a, int16_t vs) { + // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} + // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a += vs; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_int_int16 +void subtract_compound_matrix_scalar_int_int16(int4x3 a, int16_t vs) { + // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} + // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i32 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_EXT:%.*]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sub <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a -= vs; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_int_int64 +void add_matrix_scalar_int_int64(int4x3 a, int64_t vli) { + // NOOPT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a = a + vli; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_int_int64 +void add_compound_matrix_scalar_int_int64(int4x3 a, int64_t vli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a += vli; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_int_int64 +void subtract_compound_matrix_scalar_int_int64(int4x3 a, int64_t vli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sub <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a -= vli; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_int_uint64 +void add_matrix_scalar_int_uint64(int4x3 a, uint64_t vulli) { + // NOOPT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} + // NOOPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} + // OPT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a = a + vulli; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_int_uint64 +void add_compound_matrix_scalar_int_uint64(int4x3 a, uint64_t vulli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a += vulli; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_int_uint64 +void subtract_compound_matrix_scalar_int_uint64(int4x3 a, uint64_t vulli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_TRUNC:%.*]] = trunc i64 [[SCALAR]] to i32 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <12 x i32>, ptr [[MATRIX_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <12 x i32> poison, i32 [[SCALAR_TRUNC]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <12 x i32> [[SCALAR_EMBED]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sub <12 x i32> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[MAT_ADDR]], align 4 + + a -= vulli; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_uint64_short +void add_matrix_scalar_uint64_short(uint64_t4x2 b, int16_t vs) { + // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} + // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b = vs + b; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_uint64_short +void add_compound_matrix_scalar_uint64_short(uint64_t4x2 b, int16_t vs) { + // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} + // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b += vs; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_uint64_short +void subtract_compound_matrix_scalar_uint64_short(uint64_t4x2 b, int16_t vs) { + // NOOPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2{{$}} + // OPT: [[SCALAR:%.*]] = load i16, ptr %vs.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EXT:%.*]] = sext i16 [[SCALAR]] to i64 + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR_EXT]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b -= vs; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_uint64_int +void add_matrix_scalar_uint64_int(uint64_t4x2 b, int64_t vli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b = vli + b; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_uint64_int +void add_compound_matrix_scalar_uint64_int(uint64_t4x2 b, int64_t vli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b += vli; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_uint64_int +void subtract_compound_matrix_scalar_uint64_int(uint64_t4x2 b, int64_t vli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b -= vli; +} + +// CHECK-LABEL: define {{.*}}add_matrix_scalar_uint64_uint64 +void add_matrix_scalar_uint64_uint64(uint64_t4x2 b, uint64_t vulli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[SCALAR_EMBED1]], [[MATRIX]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + b = vulli + b; +} + +// CHECK-LABEL: define {{.*}}add_compound_matrix_scalar_uint64_uint64 +void add_compound_matrix_scalar_uint64_uint64(uint64_t4x2 b, uint64_t vulli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = add <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b += vulli; +} + +// CHECK-LABEL: define {{.*}}subtract_compound_matrix_scalar_uint64_uint64 +void subtract_compound_matrix_scalar_uint64_uint64(uint64_t4x2 b, uint64_t vulli) { + // NOOPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8{{$}} + // NOOPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8{{$}} + // OPT: [[SCALAR:%.*]] = load i64, ptr %vulli.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[MATRIX:%.*]] = load <8 x i64>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[SCALAR_EMBED:%.*]] = insertelement <8 x i64> poison, i64 [[SCALAR]], i64 0 + // CHECK-NEXT: [[SCALAR_EMBED1:%.*]] = shufflevector <8 x i64> [[SCALAR_EMBED]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sub <8 x i64> [[MATRIX]], [[SCALAR_EMBED1]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr {{.*}}, align 8 + + b -= vulli; +} + +// Tests for matrix multiplication. + +// CHECK-LABEL: define {{.*}}multiply_matrix_matrix_double +void multiply_matrix_matrix_double(double4x4 b, double4x4 c) { + // NOOPT: [[B:%.*]] = load <16 x double>, ptr %b.addr, align 8{{$}} + // NOOPT-NEXT: [[C:%.*]] = load <16 x double>, ptr %c.addr, align 8{{$}} + // OPT: [[B:%.*]] = load <16 x double>, ptr %b.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[C:%.*]] = load <16 x double>, ptr %c.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = call <16 x double> @llvm.matrix.multiply.v16f64.v16f64.v16f64(<16 x double> [[B]], <16 x double> [[C]], i32 4, i32 4, i32 4) + // CHECK-NEXT: store <16 x double> [[RES]], ptr %a, align 8 + // OPT-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %a) + // CHECK-NEXT: ret void + + double4x4 a; + a = b * c; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_matrix_matrix_double +void multiply_compound_matrix_matrix_double(double4x4 b, double4x4 c) { + // NOOPT: [[C:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[C:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[B:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = call <16 x double> @llvm.matrix.multiply.v16f64.v16f64.v16f64(<16 x double> [[B]], <16 x double> [[C]], i32 4, i32 4, i32 4) + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + b *= c; +} + +// CHECK-LABEL: define {{.*}}multiply_matrix_matrix_int +void multiply_matrix_matrix_int(int4x4 b, int4x4 c) { + // NOOPT: [[B:%.*]] = load <16 x i32>, ptr {{.*}}, align 4{{$}} + // NOOPT-NEXT: [[C:%.*]] = load <16 x i32>, ptr {{.*}}, align 4{{$}} + // OPT: [[B:%.*]] = load <16 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[C:%.*]] = load <16 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.matrix.multiply.v16i32.v16i32.v16i32(<16 x i32> [[B]], <16 x i32> [[C]], i32 4, i32 4, i32 4) + // CHECK-NEXT: store <16 x i32> [[RES]], ptr %a, align 4 + // OPT-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %a) + // CHECK: ret void + int4x4 a; + a = b * c; +} + +// CHECK-LABEL: define {{.*}}multiply_double_matrix_scalar_float +void multiply_double_matrix_scalar_float(double4x4 a, float s) { + // NOOPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} + // OPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <16 x double> poison, double [[S_EXT]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <16 x double> [[VECINSERT]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <16 x double> [[A]], [[VECSPLAT]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + a = a * s; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_double_matrix_scalar_float +void multiply_compound_double_matrix_scalar_float(double4x4 a, float s) { + // NOOPT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} + // OPT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double + // NOOPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <16 x double> poison, double [[S_EXT]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <16 x double> [[VECINSERT]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <16 x double> [[A]], [[VECSPLAT]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + a *= s; +} + +// CHECK-LABEL: define {{.*}}multiply_double_matrix_scalar_double +void multiply_double_matrix_scalar_double(double4x4 a, double s) { + // NOOPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} + // OPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <16 x double> poison, double [[S]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <16 x double> [[VECINSERT]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <16 x double> [[A]], [[VECSPLAT]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + a = a * s; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_double_matrix_scalar_double +void multiply_compound_double_matrix_scalar_double(double4x4 a, double s) { + // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} + // NOOPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <16 x double> poison, double [[S]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <16 x double> [[VECINSERT]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <16 x double> [[A]], [[VECSPLAT]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + a *= s; +} + +// CHECK-LABEL: define {{.*}}multiply_float_matrix_scalar_double +void multiply_float_matrix_scalar_double(float2x3 b, double s) { + // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} + // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[VECSPLAT]], [[MAT]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b = s * b; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_float_matrix_scalar_double +void multiply_compound_float_matrix_scalar_double(float2x3 b, double s) { + // NOOPT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} + // OPT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr [[B]], align 4 + // ret void + b *= s; +} + +// CHECK-LABEL: define {{.*}}multiply_int_matrix_scalar_int16 +void multiply_int_matrix_scalar_int16(int4x3 b, int16_t s) { + // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} + // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 + // NOOPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <12 x i32> poison, i32 [[S_EXT]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <12 x i32> [[VECINSERT]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = mul <12 x i32> [[VECSPLAT]], [[MAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b = s * b; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_int_matrix_scalar_int16 +void multiply_compound_int_matrix_scalar_int16(int4x3 b, int16_t s) { + // NOOPT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} + // OPT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 + // NOOPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <12 x i32> poison, i32 [[S_EXT]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <12 x i32> [[VECINSERT]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = mul <12 x i32> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b *= s; +} + +// CHECK-LABEL: define {{.*}}multiply_int_matrix_scalar_ull +void multiply_int_matrix_scalar_ull(int4x3 b, uint64_t s) { + // NOOPT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // OPT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} + // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <12 x i32> poison, i32 [[S_TRUNC]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <12 x i32> [[VECINSERT]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = mul <12 x i32> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b = b * s; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_int_matrix_scalar_ull +void multiply_compound_int_matrix_scalar_ull(int4x3 b, uint64_t s) { + // NOOPT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} + // OPT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 + // NOOPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <12 x i32> poison, i32 [[S_TRUNC]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <12 x i32> [[VECINSERT]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = mul <12 x i32> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + + b *= s; +} + +// CHECK-LABEL: define {{.*}}multiply_float_matrix_constant +void multiply_float_matrix_constant(float2x3 a) { + // CHECK: [[A_ADDR:%.*]] = alloca [6 x float], align 4 + // CHECK-NEXT: store <6 x float> %a, ptr [[A_ADDR]], align 4 + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) + // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 + // CHECK-NEXT: ret void + a = a * 2.5; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_float_matrix_constant +void multiply_compound_float_matrix_constant(float2x3 a) { + // CHECK: [[A_ADDR:%.*]] = alloca [6 x float], align 4 + // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00) + // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 + // CHECK-NEXT: ret void + a *= 2.5; +} + +// CHECK-LABEL: define {{.*}}multiply_int_matrix_constant +void multiply_int_matrix_constant(int4x3 a) { + // CHECK: [[A_ADDR:%.*]] = alloca [12 x i32], align 4 + // CHECK-NEXT: store <12 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 + // NOOPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[A_ADDR]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = mul <12 x i32> splat (i32 5), [[MAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[A_ADDR]], align 4 + // CHECK-NEXT: ret void + a = 5 * a; +} + +// CHECK-LABEL: define {{.*}}multiply_compound_int_matrix_constant +void multiply_compound_int_matrix_constant(int4x3 a) { + // CHECK: [[A_ADDR:%.*]] = alloca [12 x i32], align 4 + // CHECK-NEXT: store <12 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4 + // NOOPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[A_ADDR]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = mul <12 x i32> [[MAT]], splat (i32 5) + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[A_ADDR]], align 4 + // CHECK-NEXT: ret void + a *= 5; +} + +// CHECK-LABEL: define {{.*}}divide_double_matrix_scalar_float +void divide_double_matrix_scalar_float(double4x4 a, float s) { + // NOOPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4{{$}} + // OPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load float, ptr %s.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_EXT:%.*]] = fpext float [[S]] to double + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <16 x double> poison, double [[S_EXT]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <16 x double> [[VECINSERT]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fdiv <16 x double> [[A]], [[VECSPLAT]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + a = a / s; +} + +// CHECK-LABEL: define {{.*}}divide_double_matrix_scalar_double +void divide_double_matrix_scalar_double(double4x4 a, double s) { + // NOOPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} + // OPT: [[A:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <16 x double> poison, double [[S]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <16 x double> [[VECINSERT]], <16 x double> poison, <16 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fdiv <16 x double> [[A]], [[VECSPLAT]] + // CHECK-NEXT: store <16 x double> [[RES]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + a = a / s; +} + +// CHECK-LABEL: define {{.*}}divide_float_matrix_scalar_double +void divide_float_matrix_scalar_double(float2x3 b, double s) { + // NOOPT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4{{$}} + // NOOPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8{{$}} + // OPT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load double, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_TRUNC:%.*]] = fptrunc double [[S]] to float + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <6 x float> poison, float [[S_TRUNC]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <6 x float> [[VECINSERT]], <6 x float> poison, <6 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <6 x float> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b = b / s; +} + +// CHECK-LABEL: define {{.*}}divide_int_matrix_scalar_int16 +void divide_int_matrix_scalar_int16(int4x3 b, int16_t s) { + // NOOPT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // NOOPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2{{$}} + // OPT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load i16, ptr %s.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_EXT:%.*]] = sext i16 [[S]] to i32 + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <12 x i32> poison, i32 [[S_EXT]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <12 x i32> [[VECINSERT]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sdiv <12 x i32> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b = b / s; +} + +// CHECK-LABEL: define {{.*}}divide_int_matrix_scalar_ull +void divide_int_matrix_scalar_ull(int4x3 b, uint64_t s) { + // NOOPT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} + // OPT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[S_TRUNC:%.*]] = trunc i64 [[S]] to i32 + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <12 x i32> poison, i32 [[S_TRUNC]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <12 x i32> [[VECINSERT]], <12 x i32> poison, <12 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = sdiv <12 x i32> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <12 x i32> [[RES]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + b = b / s; +} + +// CHECK-LABEL: define {{.*}}divide_ull_matrix_scalar_ull +void divide_ull_matrix_scalar_ull(uint64_t4x2 b, uint64_t s) { + // NOOPT: [[MAT:%.*]] = load <8 x i64>, ptr [[B:%.*]], align 8{{$}} + // NOOPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8{{$}} + // OPT: [[MAT:%.*]] = load <8 x i64>, ptr [[B:%.*]], align 8, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[S:%.*]] = load i64, ptr %s.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[VECINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[S]], i64 0 + // CHECK-NEXT: [[VECSPLAT:%.*]] = shufflevector <8 x i64> [[VECINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: [[RES:%.*]] = udiv <8 x i64> [[MAT]], [[VECSPLAT]] + // CHECK-NEXT: store <8 x i64> [[RES]], ptr [[B]], align 8 + // CHECK-NEXT: ret void + b = b / s; +} + +// CHECK-LABEL: define {{.*}}divide_float_matrix_constant +void divide_float_matrix_constant(float2x3 a) { + // CHECK: [[A_ADDR:%.*]] = alloca [6 x float], align 4 + // CHECK-NEXT: store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4 + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00) + // CHECK-NEXT: store <6 x float> [[RES]], ptr [[A_ADDR]], align 4 + // CHECK-NEXT: ret void + a = a / 2.5; +} + + // Tests for the matrix type operators. + + // Check that we can use matrix index expression on different floating point + // matrixes and indices. +// CHECK-LABEL: define {{.*}}insert_double_matrix_const_idx_ll_u_double +void insert_double_matrix_const_idx_ll_u_double(double4x4 a, double d, float2x3 b, float e, int j, uint k) { + // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}} + // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x double> [[MAT]], double [[D]], [[IPTR_T]] 4 + // CHECK-NEXT: store <16 x double> [[MATINS]], ptr {{.*}}, align 8 + // CHECK-NEXT: ret void + + a[0ll][1u] = d; +} + +// CHECK-LABEL: define {{.*}}insert_double_matrix_const_idx_i_u_double +void insert_double_matrix_const_idx_i_u_double(double4x4 a, double d) { + // NOOPT: [[D:%.*]] = load double, ptr %d.addr, align 8{{$}} + // OPT: [[D:%.*]] = load double, ptr %d.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <16 x double>, ptr [[B:%.*]], align 8{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <16 x double> [[MAT]], double [[D]], [[IPTR_T]] 13 + // CHECK-NEXT: store <16 x double> [[MATINS]], ptr [[B]], align 8 + // CHECK-NEXT: ret void + + a[1][3u] = d; +} + +// CHECK-LABEL: define {{.*}}insert_float_matrix_const_idx_ull_i_float +void insert_float_matrix_const_idx_ull_i_float(float2x3 b, float e) { + // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} + // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], [[IPTR_T]] 3 + // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + + b[1ull][1] = e; +} + +// CHECK-LABEL: define {{.*}}insert_float_matrix_idx_i_u_float +void insert_float_matrix_idx_i_u_float(float2x3 b, float e, int j, uint k) { + // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} + // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J:%.*]] = sext i32 %{{.*}} to i64 + // NOOPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4{{$}} + // OPT-NEXT: [[K:%.*]] = load i32, ptr %k.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[K:%.*]] = zext i32 %{{.*}} to i64 + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[K]], 2 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[J]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 6 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + + b[j][k] = e; +} + +// CHECK-LABEL: define {{.*}}insert_float_matrix_idx_s_ull_float +void insert_float_matrix_idx_s_ull_float(float2x3 b, float e, int16_t j, uint64_t k) { + // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} + // NOOPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2{{$}} + // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[J:%.*]] = load i16, ptr %j.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[J:%.*]] = sext i16 %{{.*}} to [[IPTR_T]] + // NOOPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8{{$}} + // OPT-NEXT: [[K:%.*]] = load i64, ptr %k.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // DXIL-NEXT: [[K:%.*]] = trunc i64 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[K]], 2 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[J]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 6 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr [[B:%.*]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <6 x float> [[MATINS]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + + (b)[j][k] = e; +} + + // Check that we can can use matrix index expressions on integer matrixes. +// CHECK-LABEL: define {{.*}}insert_int_idx_expr +void insert_int_idx_expr(int4x3 a, int i) { + // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 4, [[I2]] + // SPIRV-NEXT: [[ADD:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] 8, [[ADD]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 12 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[B:%.*]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <12 x i32> [[MAT]], i32 [[I1]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <12 x i32> [[MATINS]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + + a[4 + i][1 + 1u] = i; +} + +// Check that we can can use matrix index expressions on FP and integer +// matrixes. +// CHECK-LABEL: define {{.*}}insert_float_into_int_matrix +void insert_float_into_int_matrix(inout int4x3 a, int i) { + // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // NOOPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align [[ALIGN]]{{$}} + // OPT-NEXT: [[MAT_ADDR1:%.*]] = load ptr, ptr %a.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr [[MAT_ADDR:%.*]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <12 x i32> [[MAT]], i32 [[I]], [[IPTR_T]] 7 + // CHECK-NEXT: store <12 x i32> [[MATINS]], ptr [[MAT_ADDR]], align 4 + // CHECK-NEXT: ret void + + a[3][1] = i; +} + +// Check that we can use overloaded matrix index expressions on matrixes with +// matching dimensions, but different element types. +// CHECK-LABEL: define {{.*}}insert_matching_dimensions1 +void insert_matching_dimensions1(double3x3 a, double i) { + // NOOPT: [[I:%.*]] = load double, ptr %i.addr, align 8{{$}} + // OPT: [[I:%.*]] = load double, ptr %i.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <9 x double>, ptr [[B:%.*]], align 8{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x double> [[MAT]], double [[I]], [[IPTR_T]] 5 + // CHECK-NEXT: store <9 x double> [[MATINS]], ptr [[B]], align 8 + // CHECK-NEXT: ret void + + a[2u][1u] = i; +} + +// CHECK-LABEL: define {{.*}}insert_matching_dimensions +void insert_matching_dimensions(float3x3 b, float e) { + // NOOPT: [[E:%.*]] = load float, ptr %e.addr, align 4{{$}} + // OPT: [[E:%.*]] = load float, ptr %e.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[B:%.*]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT]], float [[E]], [[IPTR_T]] 7 + // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[B]], align 4 + // CHECK-NEXT: ret void + + b[1u][2u] = e; +} + +// CHECK-LABEL: define {{.*}}extract_double +double extract_double(double4x4 a) { + // NOOPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8{{$}} + // OPT: [[MAT:%.*]] = load <16 x double>, ptr {{.*}}, align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], [[IPTR_T]] 10 + // CHECK-NEXT: ret double [[MATEXT]] + + return a[2][3 - 1u]; +} + +// CHECK-LABEL: define {{.*}}extract_float +double extract_float(float3x3 b) { + // NOOPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4{{$}} + // OPT: [[MAT:%.*]] = load <9 x float>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], [[IPTR_T]] 5 + // CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[MATEXT]] to double + // CHECK-NEXT: ret double [[TO_DOUBLE]] + + return b[2][1]; +} + +// CHECK-LABEL: define {{.*}}extract_int +int extract_int(int4x3 c, uint64_t j) { + // NOOPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8{{$}} + // OPT: [[J1:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // DXIL-NEXT: [[J1:%.*]] = trunc i64 %{{.*}} to [[IPTR_T]] + // NOOPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8{{$}} + // OPT-NEXT: [[J2:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // DXIL-NEXT: [[J2:%.*]] = trunc i64 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[J2]], 4 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[J1]] + // NOOPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 12 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // OPT-NEXT: [[MAT:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <12 x i32> [[MAT]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: ret i32 [[MATEXT]] + + return c[j][j]; +} + +// CHECK-LABEL: define {{.*}}test_extract_matrix_pointer1 +double test_extract_matrix_pointer1(inout double3x2 ptr, uint j) { + // NOOPT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J:%.*]] = zext i32 {{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX:%.*]] = add [[IPTR_T]] 3, [[J]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX]], 6 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // NOOPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align [[ALIGN]]{{$}} + // OPT-NEXT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR]], align 8{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR]], align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], [[IPTR_T]] [[IDX]] + // CHECK-NEXT: ret double [[MATEXT]] + + return ptr[j][1]; +} + +// CHECK-LABEL: define {{.*}}test_extract_matrix_pointer2 +double test_extract_matrix_pointer2(inout double3x2 ptr) { + // NOOPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align [[ALIGN]]{{$}} + // OPT: [[PTR:%.*]] = load ptr, ptr %ptr.addr, align [[ALIGN]], !tbaa !{{[0-9]+}}{{$}} + // NOOPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR]], align 8{{$}} + // OPT-NEXT: [[MAT:%.*]] = load <6 x double>, ptr [[PTR]], align 8, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], [[IPTR_T]] 5 + // CHECK-NEXT: ret double [[MATEXT]] + + return ptr[2][1 * 3 - 2]; +} + +// CHECK-LABEL: define {{.*}}insert_extract +void insert_extract(double4x4 a, float3x3 b, uint64_t j, int16_t k) { + // NOOPT: [[K:%.*]] = load i16, ptr %k.addr, align 2{{$}} + // OPT: [[K:%.*]] = load i16, ptr %k.addr, align 2, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to [[IPTR_T]] + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[K_EXT]], 3 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], 0 + // NOOPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4{{$}} + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 9 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // OPT-NEXT: [[MAT:%.*]] = load <9 x float>, ptr [[MAT_ADDR:%.*]], align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], [[IPTR_T]] [[IDX2]] + // NOOPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8{{$}} + // OPT-NEXT: [[J:%.*]] = load i64, ptr %j.addr, align 8, !tbaa !{{[0-9]+}}{{$}} + // DXIL-NEXT: [[J:%.*]] = trunc i64 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX3:%.*]] = mul [[IPTR_T]] [[J]], 3 + // CHECK-NEXT: [[IDX4:%.*]] = add [[IPTR_T]] [[IDX3]], 2 + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX4]], 9 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, ptr [[MAT_ADDR]], align 4{{$}} + // CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], [[IPTR_T]] [[IDX4]] + // CHECK-NEXT: store <9 x float> [[MATINS]], ptr [[MAT_ADDR]], align 4 + // CHECK-NEXT: ret void + + b[2][j] = b[0][k]; +} + +// CHECK-LABEL: define {{.*}}insert_compound_stmt +void insert_compound_stmt(double4x4 a) { + // CHECK: [[A:%.*]] = load <16 x double>, ptr [[A_PTR:%.*]], align 8{{$}} + // CHECK-NEXT: [[EXT:%.*]] = extractelement <16 x double> [[A]], [[IPTR_T]] 14 + // CHECK-NEXT: [[SUB:%.*]] = fsub double [[EXT]], 1.000000e+00 + // CHECK-NEXT: [[A2:%.*]] = load <16 x double>, ptr [[A_PTR]], align 8{{$}} + // CHECK-NEXT: [[INS:%.*]] = insertelement <16 x double> [[A2]], double [[SUB]], [[IPTR_T]] 14 + // CHECK-NEXT: store <16 x double> [[INS]], ptr [[A_PTR]], align 8 + // CHECK-NEXT: ret void + + a[2][3] -= 1.0; +} + +struct Foo { + float2x3 mat; +}; + +// CHECK-LABEL: define {{.*}}insert_compound_stmt_field +void insert_compound_stmt_field(inout struct Foo a, float f, uint i, uint j) { + // NOOPT: [[I:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT: [[I:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I:%.*]] = zext i32 %{{.*}} to [[IPTR_T]] + // NOOPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT-NEXT: [[J:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J:%.*]] = zext i32 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX1:%.*]] = mul [[IPTR_T]] [[J]], 2 + // CHECK-NEXT: [[IDX2:%.*]] = add [[IPTR_T]] [[IDX1]], [[I]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 6 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + + // CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, ptr %mat, align 4{{$}} + // CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}} + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2]], 6 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, ptr %mat, align 4{{$}} + // CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], [[IPTR_T]] [[IDX2]] + // CHECK-NEXT: store <6 x float> [[INS]], ptr %mat, align 4 + // CHECK-NEXT: ret void + + a.mat[i][j] += f; +} + +// CHECK-LABEL: define {{.*}}matrix_as_idx +void matrix_as_idx(int4x3 a, int i, int j, double4x4 b) { + // NOOPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT: [[I1:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I1:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // NOOPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT-NEXT: [[J1:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J1:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX1_1:%.*]] = mul [[IPTR_T]] [[J1]], 4 + // CHECK-NEXT: [[IDX1_2:%.*]] = add [[IPTR_T]] [[IDX1_1]], [[I1]] + // NOOPT-NEXT: [[A:%.*]] = load <12 x i32>, ptr %a.addr, align 4{{$}} + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX1_2]], 12 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // OPT-NEXT: [[A:%.*]] = load <12 x i32>, ptr %a.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MI1:%.*]] = extractelement <12 x i32> [[A]], [[IPTR_T]] [[IDX1_2]] + // SPIRV-NEXT: [[MI1:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // NOOPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4{{$}} + // OPT-NEXT: [[J2:%.*]] = load i32, ptr %j.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[J2:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // NOOPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4{{$}} + // OPT-NEXT: [[I2:%.*]] = load i32, ptr %i.addr, align 4, !tbaa !{{[0-9]+}}{{$}} + // SPIRV-NEXT: [[I2:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX2_1:%.*]] = mul [[IPTR_T]] [[I2]], 4 + // CHECK-NEXT: [[IDX2_2:%.*]] = add [[IPTR_T]] [[IDX2_1]], [[J2]] + // NOOPT-NEXT: [[A2:%.*]] = load <12 x i32>, ptr {{.*}}, align 4{{$}} + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX2_2]], 12 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // OPT-NEXT: [[A2:%.*]] = load <12 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}} + // CHECK-NEXT: [[MI2:%.*]] = extractelement <12 x i32> [[A2]], [[IPTR_T]] [[IDX2_2]] + // CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2 + // SPIRV-NEXT: [[MI3:%.*]] = sext i32 %{{.*}} to [[IPTR_T]] + // CHECK-NEXT: [[IDX3_1:%.*]] = mul [[IPTR_T]] [[MI3]], 4 + // CHECK-NEXT: [[IDX3_2:%.*]] = add [[IPTR_T]] [[IDX3_1]], [[MI1]] + // OPT-NEXT: [[CMP:%.*]] = icmp ult [[IPTR_T]] [[IDX3_2]], 16 + // OPT-NEXT: call void @llvm.assume(i1 [[CMP]]) + // CHECK-NEXT: [[B:%.*]] = load <16 x double>, ptr [[B_PTR:%.*]], align 8{{$}} + // CHECK-NEXT: [[INS:%.*]] = insertelement <16 x double> [[B]], double 1.500000e+00, [[IPTR_T]] [[IDX3_2]] + // CHECK-NEXT: store <16 x double> [[INS]], ptr [[B_PTR]], align 8 + // CHECK-NEXT: ret void + + b[a[i][j]][a[j][i] + 2] = 1.5; +} + + diff --git a/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type.hlsl b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type.hlsl new file mode 100644 index 0000000000000..8747d23dcd2f1 --- /dev/null +++ b/clang/test/CodeGenHLSL/Types/BuiltinMatrix/matrix-type.hlsl @@ -0,0 +1,219 @@ +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple spirv-unknown-vulkan-compute -fnative-half-type -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,SPIRV +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +// CHECK: %struct.Matrix = type { i16, [12 x float], float } + +// CHECK-LABEL: define {{.*}}load_store_double +void load_store_double(inout double4x4 a, inout double4x4 b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: [[B:%.*]] = load <16 x double>, ptr [[B_PTR]], align 8 + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: store <16 x double> [[B]], ptr [[A_PTR]], align 8 + // CHECK-NEXT: ret void + + a = b; +} + +// CHECK-LABEL: define {{.*}}load_store_float +void load_store_float(inout float3x4 a, inout float3x4 b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: [[B:%.*]] = load <12 x float>, ptr [[B_PTR]], align 4 + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: store <12 x float> [[B]], ptr [[A_PTR]], align 4 + // CHECK-NEXT: ret void + + a = b; +} + +// CHECK-LABEL: define {{.*}}load_store_int +void load_store_int(inout int3x4 a, inout int3x4 b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: [[B:%.*]] = load <12 x i32>, ptr [[B_PTR]], align 4 + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: store <12 x i32> [[B]], ptr [[A_PTR]], align 4 + // CHECK-NEXT: ret void + + a = b; +} + +// CHECK-LABEL: define {{.*}}load_store_ull +void load_store_ull(inout uint64_t3x4 a, inout uint64_t3x4 b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: [[B:%.*]] = load <12 x i64>, ptr [[B_PTR]], align 8 + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: store <12 x i64> [[B]], ptr [[A_PTR]], align 8 + // CHECK-NEXT: ret void + + a = b; +} + +// CHECK-LABEL: define {{.*}}load_store_fp16 +void load_store_fp16(inout float16_t3x4 a, inout float16_t3x4 b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: [[B:%.*]] = load <12 x half>, ptr [[B_PTR]], align 2 + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: store <12 x half> [[B]], ptr [[A_PTR]], align 2 + // CHECK-NEXT: ret void + + a = b; +} + + +typedef struct { + uint16_t Tmp1; + float3x4 Data; + float Tmp2; +} Matrix; + +// CHECK-LABEL: define {{.*}}matrix_struct +void matrix_struct(Matrix a, Matrix b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %Data = getelementptr inbounds nuw %struct.Matrix, ptr %a, i32 0, i32 1 + // CHECK-NEXT: [[tmp:%[0-9]*]] = load <12 x float>, ptr %Data, align 4 + // CHECK-NEXT: %Data1 = getelementptr inbounds nuw %struct.Matrix, ptr %b, i32 0, i32 1 + // CHECK-NEXT: store <12 x float> [[tmp]], ptr %Data1, align 4 + // CHECK-NEXT: ret void + b.Data = a.Data; +} + +// CHECK-LABEL: define {{.*}}parameter_passing +void parameter_passing(in float3x3 a, inout float3x3 b, out float3x3 c) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca [9 x float], align 4 + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: %c.addr = alloca ptr, align + // CHECK-NEXT: store <9 x float> %a, ptr %a.addr, align 4 + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: store ptr %c, ptr %c.addr, align + // CHECK-NEXT: [[A:%.*]] = load <9 x float>, ptr %a.addr, align 4 + // CHECK-NEXT: [[B:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: store <9 x float> [[A]], ptr [[B]], align 4 + // CHECK-NEXT: [[C:%.*]] = load ptr, ptr %c.addr, align + // CHECK-NEXT: store <9 x float> [[A]], ptr [[C]], align 4 + // CHECK-NEXT: ret void + c = b = a; +} + +// CHECK-LABEL: define {{.*}}return_matrix +float3x3 return_matrix(inout float3x3 a) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: [[A:%.*]] = load <9 x float>, ptr [[A_PTR]], align 4 + // CHECK-NEXT: ret <9 x float> [[A]] + return a; +} + + +class MatrixClass { + int Tmp1; + float3x4 Data; + int64_t Tmp2; +}; + +// CHECK-LABEL: define {{.*}}matrix_class_reference +void matrix_class_reference(inout MatrixClass a, inout MatrixClass b) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: %Data = getelementptr inbounds nuw %class.MatrixClass, ptr [[A_PTR]], i32 0, i32 1 + // CHECK-NEXT: [[DATA:%.*]] = load <12 x float>, ptr %Data, align 4 + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: %Data1 = getelementptr inbounds nuw %class.MatrixClass, ptr [[B_PTR]], i32 0, i32 1 + // CHECK-NEXT: store <12 x float> [[DATA]], ptr %Data1, align 4 + // CHECK-NEXT: ret void + b.Data = a.Data; +} + +template +class MatrixClassTemplate { + using MatrixTy = matrix; + int Tmp1; + MatrixTy Data; + int64_t Tmp2; +}; + +template +void matrix_template_reference(inout MatrixClassTemplate a, inout MatrixClassTemplate b) { + b.Data = a.Data; +} + +// CHECK-LABEL: define {{.*}}matrix_template_reference_caller +MatrixClassTemplate matrix_template_reference_caller(matrix Data) { + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %Data.addr = alloca [12 x float], align 4 + // CHECK-NEXT: %Arg = alloca %class.MatrixClassTemplate, align 8 + // CHECK-NEXT: %tmp = alloca %class.MatrixClassTemplate, align 8 + // CHECK-NEXT: %tmp2 = alloca %class.MatrixClassTemplate, align 8 + // CHECK-NEXT: store <12 x float> %Data, ptr %Data.addr, align 4 + // CHECK-NEXT: [[DATA:%.*]] = load <12 x float>, ptr %Data.addr, align 4 + // CHECK-NEXT: %Data1 = getelementptr inbounds nuw %class.MatrixClassTemplate, ptr %Arg, i32 0, i32 1 + // CHECK-NEXT: store <12 x float> [[DATA]], ptr %Data1, align 4 + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i{{[0-9]*}}(ptr align 8 %tmp, ptr align 8 %Arg, i{{[0-9]*}} 64, i1 false) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i{{[0-9]*}}(ptr align 8 %tmp2, ptr align 8 %agg.result, i{{[0-9]*}} 64, i1 false) + // CHECK-NEXT: call{{.*}} void @_Z25matrix_template_referenceIfLj3ELj4EEv19MatrixClassTemplateIT_XT0_EXT1_EES2_(ptr noalias nonnull align 8 dereferenceable(64) %tmp, ptr noalias nonnull align 8 dereferenceable(64) %tmp2) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i{{[0-9]*}}(ptr align 8 %Arg, ptr align 8 %tmp, i{{[0-9]*}} 64, i1 false) + // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i{{[0-9]*}}(ptr align 8 %agg.result, ptr align 8 %tmp2, i{{[0-9]*}} 64, i1 false) + // CHECK-NEXT: ret void + + // CHECK-LABEL: define{{.*}} void @_Z25matrix_template_referenceIfLj3ELj4EEv19MatrixClassTemplateIT_XT0_EXT1_EES2_(ptr noalias nonnull align 8 dereferenceable(64) %a, ptr noalias nonnull align 8 dereferenceable(64) %b) + // CHECK-NEXT: entry: + // SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry() + // CHECK-NEXT: %a.addr = alloca ptr, align + // CHECK-NEXT: %b.addr = alloca ptr, align + // CHECK-NEXT: store ptr %a, ptr %a.addr, align + // CHECK-NEXT: store ptr %b, ptr %b.addr, align + // CHECK-NEXT: [[A_PTR:%.*]] = load ptr, ptr %a.addr, align + // CHECK-NEXT: %Data = getelementptr inbounds nuw %class.MatrixClassTemplate, ptr [[A_PTR]], i32 0, i32 1 + // CHECK-NEXT: [[DATA:%.*]] = load <12 x float>, ptr %Data, align 4 + // CHECK-NEXT: [[B_PTR:%.*]] = load ptr, ptr %b.addr, align + // CHECK-NEXT: %Data1 = getelementptr inbounds nuw %class.MatrixClassTemplate, ptr [[B_PTR]], i32 0, i32 1 + // CHECK-NEXT: store <12 x float> [[DATA]], ptr %Data1, align 4 + // CHECK-NEXT: ret void + + MatrixClassTemplate Result, Arg; + Arg.Data = Data; + matrix_template_reference(Arg, Result); + return Result; +} + + diff --git a/clang/test/CodeGenHLSL/matrix-types.hlsl b/clang/test/CodeGenHLSL/matrix-types.hlsl new file mode 100644 index 0000000000000..721d383cd04f1 --- /dev/null +++ b/clang/test/CodeGenHLSL/matrix-types.hlsl @@ -0,0 +1,348 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - -DNAMESPACED| FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - -DSPIRV| FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - -DSPIRV -DNAMESPACED| FileCheck %s + +#ifdef NAMESPACED +#define TYPE_DECL(T) hlsl::T T##_Val +#else +#define TYPE_DECL(T) T T##_Val +#endif + +// Until MicrosoftCXXABI supports mangling matrices, +// these have to be local variables for DXIL. +#ifndef SPIRV +void f() { +#endif + +// built-in matrix types: + +// Capture target-specific details. +//CHECK: [[PFX:[%@]]]int16_t1x1_Val = [[STR:(alloca|global)]] [1 x i16][[ZI:( zeroinitializer)?]], align 2 +//CHECK: [[PFX]]int16_t1x2_Val = [[STR]] [2 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t1x3_Val = [[STR]] [3 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t1x4_Val = [[STR]] [4 x i16][[ZI]], align 2 +TYPE_DECL( int16_t1x1 ); +TYPE_DECL( int16_t1x2 ); +TYPE_DECL( int16_t1x3 ); +TYPE_DECL( int16_t1x4 ); + +//CHECK: [[PFX]]int16_t2x1_Val = [[STR]] [2 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t2x2_Val = [[STR]] [4 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t2x3_Val = [[STR]] [6 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t2x4_Val = [[STR]] [8 x i16][[ZI]], align 2 +TYPE_DECL( int16_t2x1 ); +TYPE_DECL( int16_t2x2 ); +TYPE_DECL( int16_t2x3 ); +TYPE_DECL( int16_t2x4 ); + +//CHECK: [[PFX]]int16_t3x1_Val = [[STR]] [3 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t3x2_Val = [[STR]] [6 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t3x3_Val = [[STR]] [9 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t3x4_Val = [[STR]] [12 x i16][[ZI]], align 2 +TYPE_DECL( int16_t3x1 ); +TYPE_DECL( int16_t3x2 ); +TYPE_DECL( int16_t3x3 ); +TYPE_DECL( int16_t3x4 ); + +//CHECK: [[PFX]]int16_t4x1_Val = [[STR]] [4 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t4x2_Val = [[STR]] [8 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t4x3_Val = [[STR]] [12 x i16][[ZI]], align 2 +//CHECK: [[PFX]]int16_t4x4_Val = [[STR]] [16 x i16][[ZI]], align 2 +TYPE_DECL( int16_t4x1 ); +TYPE_DECL( int16_t4x2 ); +TYPE_DECL( int16_t4x3 ); +TYPE_DECL( int16_t4x4 ); + +//CHECK: [[PFX]]uint16_t1x1_Val = [[STR]] [1 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t1x2_Val = [[STR]] [2 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t1x3_Val = [[STR]] [3 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t1x4_Val = [[STR]] [4 x i16][[ZI]], align 2 +TYPE_DECL( uint16_t1x1 ); +TYPE_DECL( uint16_t1x2 ); +TYPE_DECL( uint16_t1x3 ); +TYPE_DECL( uint16_t1x4 ); + +//CHECK: [[PFX]]uint16_t2x1_Val = [[STR]] [2 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t2x2_Val = [[STR]] [4 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t2x3_Val = [[STR]] [6 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t2x4_Val = [[STR]] [8 x i16][[ZI]], align 2 +TYPE_DECL( uint16_t2x1 ); +TYPE_DECL( uint16_t2x2 ); +TYPE_DECL( uint16_t2x3 ); +TYPE_DECL( uint16_t2x4 ); + +//CHECK: [[PFX]]uint16_t3x1_Val = [[STR]] [3 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t3x2_Val = [[STR]] [6 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t3x3_Val = [[STR]] [9 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t3x4_Val = [[STR]] [12 x i16][[ZI]], align 2 +TYPE_DECL( uint16_t3x1 ); +TYPE_DECL( uint16_t3x2 ); +TYPE_DECL( uint16_t3x3 ); +TYPE_DECL( uint16_t3x4 ); + +//CHECK: [[PFX]]uint16_t4x1_Val = [[STR]] [4 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t4x2_Val = [[STR]] [8 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t4x3_Val = [[STR]] [12 x i16][[ZI]], align 2 +//CHECK: [[PFX]]uint16_t4x4_Val = [[STR]] [16 x i16][[ZI]], align 2 +TYPE_DECL( uint16_t4x1 ); +TYPE_DECL( uint16_t4x2 ); +TYPE_DECL( uint16_t4x3 ); +TYPE_DECL( uint16_t4x4 ); + +//CHECK: [[PFX]]int1x1_Val = [[STR]] [1 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int1x2_Val = [[STR]] [2 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int1x3_Val = [[STR]] [3 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int1x4_Val = [[STR]] [4 x i32][[ZI]], align 4 +TYPE_DECL( int1x1 ); +TYPE_DECL( int1x2 ); +TYPE_DECL( int1x3 ); +TYPE_DECL( int1x4 ); + +//CHECK: [[PFX]]int2x1_Val = [[STR]] [2 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int2x2_Val = [[STR]] [4 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int2x3_Val = [[STR]] [6 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int2x4_Val = [[STR]] [8 x i32][[ZI]], align 4 +TYPE_DECL( int2x1 ); +TYPE_DECL( int2x2 ); +TYPE_DECL( int2x3 ); +TYPE_DECL( int2x4 ); + +//CHECK: [[PFX]]int3x1_Val = [[STR]] [3 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int3x2_Val = [[STR]] [6 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int3x3_Val = [[STR]] [9 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int3x4_Val = [[STR]] [12 x i32][[ZI]], align 4 +TYPE_DECL( int3x1 ); +TYPE_DECL( int3x2 ); +TYPE_DECL( int3x3 ); +TYPE_DECL( int3x4 ); + +//CHECK: [[PFX]]int4x1_Val = [[STR]] [4 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int4x2_Val = [[STR]] [8 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int4x3_Val = [[STR]] [12 x i32][[ZI]], align 4 +//CHECK: [[PFX]]int4x4_Val = [[STR]] [16 x i32][[ZI]], align 4 +TYPE_DECL( int4x1 ); +TYPE_DECL( int4x2 ); +TYPE_DECL( int4x3 ); +TYPE_DECL( int4x4 ); + +//CHECK: [[PFX]]uint1x1_Val = [[STR]] [1 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint1x2_Val = [[STR]] [2 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint1x3_Val = [[STR]] [3 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint1x4_Val = [[STR]] [4 x i32][[ZI]], align 4 +TYPE_DECL( uint1x1 ); +TYPE_DECL( uint1x2 ); +TYPE_DECL( uint1x3 ); +TYPE_DECL( uint1x4 ); + +//CHECK: [[PFX]]uint2x1_Val = [[STR]] [2 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint2x2_Val = [[STR]] [4 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint2x3_Val = [[STR]] [6 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint2x4_Val = [[STR]] [8 x i32][[ZI]], align 4 +TYPE_DECL( uint2x1 ); +TYPE_DECL( uint2x2 ); +TYPE_DECL( uint2x3 ); +TYPE_DECL( uint2x4 ); + +//CHECK: [[PFX]]uint3x1_Val = [[STR]] [3 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint3x2_Val = [[STR]] [6 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint3x3_Val = [[STR]] [9 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint3x4_Val = [[STR]] [12 x i32][[ZI]], align 4 +TYPE_DECL( uint3x1 ); +TYPE_DECL( uint3x2 ); +TYPE_DECL( uint3x3 ); +TYPE_DECL( uint3x4 ); + +//CHECK: [[PFX]]uint4x1_Val = [[STR]] [4 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint4x2_Val = [[STR]] [8 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint4x3_Val = [[STR]] [12 x i32][[ZI]], align 4 +//CHECK: [[PFX]]uint4x4_Val = [[STR]] [16 x i32][[ZI]], align 4 +TYPE_DECL( uint4x1 ); +TYPE_DECL( uint4x2 ); +TYPE_DECL( uint4x3 ); +TYPE_DECL( uint4x4 ); + +//CHECK: [[PFX]]int64_t1x1_Val = [[STR]] [1 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t1x2_Val = [[STR]] [2 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t1x3_Val = [[STR]] [3 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t1x4_Val = [[STR]] [4 x i64][[ZI]], align 8 +TYPE_DECL( int64_t1x1 ); +TYPE_DECL( int64_t1x2 ); +TYPE_DECL( int64_t1x3 ); +TYPE_DECL( int64_t1x4 ); + +//CHECK: [[PFX]]int64_t2x1_Val = [[STR]] [2 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t2x2_Val = [[STR]] [4 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t2x3_Val = [[STR]] [6 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t2x4_Val = [[STR]] [8 x i64][[ZI]], align 8 +TYPE_DECL( int64_t2x1 ); +TYPE_DECL( int64_t2x2 ); +TYPE_DECL( int64_t2x3 ); +TYPE_DECL( int64_t2x4 ); + +//CHECK: [[PFX]]int64_t3x1_Val = [[STR]] [3 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t3x2_Val = [[STR]] [6 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t3x3_Val = [[STR]] [9 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t3x4_Val = [[STR]] [12 x i64][[ZI]], align 8 +TYPE_DECL( int64_t3x1 ); +TYPE_DECL( int64_t3x2 ); +TYPE_DECL( int64_t3x3 ); +TYPE_DECL( int64_t3x4 ); + +//CHECK: [[PFX]]int64_t4x1_Val = [[STR]] [4 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t4x2_Val = [[STR]] [8 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t4x3_Val = [[STR]] [12 x i64][[ZI]], align 8 +//CHECK: [[PFX]]int64_t4x4_Val = [[STR]] [16 x i64][[ZI]], align 8 +TYPE_DECL( int64_t4x1 ); +TYPE_DECL( int64_t4x2 ); +TYPE_DECL( int64_t4x3 ); +TYPE_DECL( int64_t4x4 ); + +//CHECK: [[PFX]]uint64_t1x1_Val = [[STR]] [1 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t1x2_Val = [[STR]] [2 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t1x3_Val = [[STR]] [3 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t1x4_Val = [[STR]] [4 x i64][[ZI]], align 8 +TYPE_DECL( uint64_t1x1 ); +TYPE_DECL( uint64_t1x2 ); +TYPE_DECL( uint64_t1x3 ); +TYPE_DECL( uint64_t1x4 ); + +//CHECK: [[PFX]]uint64_t2x1_Val = [[STR]] [2 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t2x2_Val = [[STR]] [4 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t2x3_Val = [[STR]] [6 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t2x4_Val = [[STR]] [8 x i64][[ZI]], align 8 +TYPE_DECL( uint64_t2x1 ); +TYPE_DECL( uint64_t2x2 ); +TYPE_DECL( uint64_t2x3 ); +TYPE_DECL( uint64_t2x4 ); + +//CHECK: [[PFX]]uint64_t3x1_Val = [[STR]] [3 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t3x2_Val = [[STR]] [6 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t3x3_Val = [[STR]] [9 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t3x4_Val = [[STR]] [12 x i64][[ZI]], align 8 +TYPE_DECL( uint64_t3x1 ); +TYPE_DECL( uint64_t3x2 ); +TYPE_DECL( uint64_t3x3 ); +TYPE_DECL( uint64_t3x4 ); + +//CHECK: [[PFX]]uint64_t4x1_Val = [[STR]] [4 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t4x2_Val = [[STR]] [8 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t4x3_Val = [[STR]] [12 x i64][[ZI]], align 8 +//CHECK: [[PFX]]uint64_t4x4_Val = [[STR]] [16 x i64][[ZI]], align 8 +TYPE_DECL( uint64_t4x1 ); +TYPE_DECL( uint64_t4x2 ); +TYPE_DECL( uint64_t4x3 ); +TYPE_DECL( uint64_t4x4 ); + + +//CHECK: [[PFX]]half1x1_Val = [[STR]] [1 x half][[ZI]], align 2 +//CHECK: [[PFX]]half1x2_Val = [[STR]] [2 x half][[ZI]], align 2 +//CHECK: [[PFX]]half1x3_Val = [[STR]] [3 x half][[ZI]], align 2 +//CHECK: [[PFX]]half1x4_Val = [[STR]] [4 x half][[ZI]], align 2 +TYPE_DECL( half1x1 ); +TYPE_DECL( half1x2 ); +TYPE_DECL( half1x3 ); +TYPE_DECL( half1x4 ); + +//CHECK: [[PFX]]half2x1_Val = [[STR]] [2 x half][[ZI]], align 2 +//CHECK: [[PFX]]half2x2_Val = [[STR]] [4 x half][[ZI]], align 2 +//CHECK: [[PFX]]half2x3_Val = [[STR]] [6 x half][[ZI]], align 2 +//CHECK: [[PFX]]half2x4_Val = [[STR]] [8 x half][[ZI]], align 2 +TYPE_DECL( half2x1 ); +TYPE_DECL( half2x2 ); +TYPE_DECL( half2x3 ); +TYPE_DECL( half2x4 ); + +//CHECK: [[PFX]]half3x1_Val = [[STR]] [3 x half][[ZI]], align 2 +//CHECK: [[PFX]]half3x2_Val = [[STR]] [6 x half][[ZI]], align 2 +//CHECK: [[PFX]]half3x3_Val = [[STR]] [9 x half][[ZI]], align 2 +//CHECK: [[PFX]]half3x4_Val = [[STR]] [12 x half][[ZI]], align 2 +TYPE_DECL( half3x1 ); +TYPE_DECL( half3x2 ); +TYPE_DECL( half3x3 ); +TYPE_DECL( half3x4 ); + +//CHECK: [[PFX]]half4x1_Val = [[STR]] [4 x half][[ZI]], align 2 +//CHECK: [[PFX]]half4x2_Val = [[STR]] [8 x half][[ZI]], align 2 +//CHECK: [[PFX]]half4x3_Val = [[STR]] [12 x half][[ZI]], align 2 +//CHECK: [[PFX]]half4x4_Val = [[STR]] [16 x half][[ZI]], align 2 +TYPE_DECL( half4x1 ); +TYPE_DECL( half4x2 ); +TYPE_DECL( half4x3 ); +TYPE_DECL( half4x4 ); + +//CHECK: [[PFX]]float1x1_Val = [[STR]] [1 x float][[ZI]], align 4 +//CHECK: [[PFX]]float1x2_Val = [[STR]] [2 x float][[ZI]], align 4 +//CHECK: [[PFX]]float1x3_Val = [[STR]] [3 x float][[ZI]], align 4 +//CHECK: [[PFX]]float1x4_Val = [[STR]] [4 x float][[ZI]], align 4 +TYPE_DECL( float1x1 ); +TYPE_DECL( float1x2 ); +TYPE_DECL( float1x3 ); +TYPE_DECL( float1x4 ); + +//CHECK: [[PFX]]float2x1_Val = [[STR]] [2 x float][[ZI]], align 4 +//CHECK: [[PFX]]float2x2_Val = [[STR]] [4 x float][[ZI]], align 4 +//CHECK: [[PFX]]float2x3_Val = [[STR]] [6 x float][[ZI]], align 4 +//CHECK: [[PFX]]float2x4_Val = [[STR]] [8 x float][[ZI]], align 4 +TYPE_DECL( float2x1 ); +TYPE_DECL( float2x2 ); +TYPE_DECL( float2x3 ); +TYPE_DECL( float2x4 ); + +//CHECK: [[PFX]]float3x1_Val = [[STR]] [3 x float][[ZI]], align 4 +//CHECK: [[PFX]]float3x2_Val = [[STR]] [6 x float][[ZI]], align 4 +//CHECK: [[PFX]]float3x3_Val = [[STR]] [9 x float][[ZI]], align 4 +//CHECK: [[PFX]]float3x4_Val = [[STR]] [12 x float][[ZI]], align 4 +TYPE_DECL( float3x1 ); +TYPE_DECL( float3x2 ); +TYPE_DECL( float3x3 ); +TYPE_DECL( float3x4 ); + +//CHECK: [[PFX]]float4x1_Val = [[STR]] [4 x float][[ZI]], align 4 +//CHECK: [[PFX]]float4x2_Val = [[STR]] [8 x float][[ZI]], align 4 +//CHECK: [[PFX]]float4x3_Val = [[STR]] [12 x float][[ZI]], align 4 +//CHECK: [[PFX]]float4x4_Val = [[STR]] [16 x float][[ZI]], align 4 +TYPE_DECL( float4x1 ); +TYPE_DECL( float4x2 ); +TYPE_DECL( float4x3 ); +TYPE_DECL( float4x4 ); + +//CHECK: [[PFX]]double1x1_Val = [[STR]] [1 x double][[ZI]], align 8 +//CHECK: [[PFX]]double1x2_Val = [[STR]] [2 x double][[ZI]], align 8 +//CHECK: [[PFX]]double1x3_Val = [[STR]] [3 x double][[ZI]], align 8 +//CHECK: [[PFX]]double1x4_Val = [[STR]] [4 x double][[ZI]], align 8 +TYPE_DECL( double1x1 ); +TYPE_DECL( double1x2 ); +TYPE_DECL( double1x3 ); +TYPE_DECL( double1x4 ); + +//CHECK: [[PFX]]double2x1_Val = [[STR]] [2 x double][[ZI]], align 8 +//CHECK: [[PFX]]double2x2_Val = [[STR]] [4 x double][[ZI]], align 8 +//CHECK: [[PFX]]double2x3_Val = [[STR]] [6 x double][[ZI]], align 8 +//CHECK: [[PFX]]double2x4_Val = [[STR]] [8 x double][[ZI]], align 8 +TYPE_DECL( double2x1 ); +TYPE_DECL( double2x2 ); +TYPE_DECL( double2x3 ); +TYPE_DECL( double2x4 ); + +//CHECK: [[PFX]]double3x1_Val = [[STR]] [3 x double][[ZI]], align 8 +//CHECK: [[PFX]]double3x2_Val = [[STR]] [6 x double][[ZI]], align 8 +//CHECK: [[PFX]]double3x3_Val = [[STR]] [9 x double][[ZI]], align 8 +//CHECK: [[PFX]]double3x4_Val = [[STR]] [12 x double][[ZI]], align 8 +TYPE_DECL( double3x1 ); +TYPE_DECL( double3x2 ); +TYPE_DECL( double3x3 ); +TYPE_DECL( double3x4 ); + +//CHECK: [[PFX]]double4x1_Val = [[STR]] [4 x double][[ZI]], align 8 +//CHECK: [[PFX]]double4x2_Val = [[STR]] [8 x double][[ZI]], align 8 +//CHECK: [[PFX]]double4x3_Val = [[STR]] [12 x double][[ZI]], align 8 +//CHECK: [[PFX]]double4x4_Val = [[STR]] [16 x double][[ZI]], align 8 +TYPE_DECL( double4x1 ); +TYPE_DECL( double4x2 ); +TYPE_DECL( double4x3 ); +TYPE_DECL( double4x4 ); + +#ifndef SPIRV +} +#endif diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-cast.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-cast.hlsl new file mode 100644 index 0000000000000..03045e978c268 --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-cast.hlsl @@ -0,0 +1,138 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-library -fnative-half-type -finclude-default-header -fsyntax-only %s -verify + +typedef struct test_struct { // expected-note 1+ {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}} + // expected-note-re@-1 1+ {{candidate constructor (the implicit move constructor) not viable: no known conversion from '{{[^']*}}' (aka '{{[^']*}}') to 'test_struct' for 1st argument}} + // expected-note-re@-2 1+ {{candidate constructor (the implicit copy constructor) not viable: no known conversion from '{{[^']*}}' (aka '{{[^']*}}') to 'const test_struct' for 1st argument}} +} test_struct; + +void f1(void) { + uint16_t3x3 u16_3x3; + int3x3 i32_3x3; + int16_t3x3 i16_3x3; + int4x4 i32_4x4; + float4x4 f32_4x4; + int i; + float4 v; + test_struct s; + + i32_3x3 = (int3x3)u16_3x3; + i16_3x3 = (int16_t3x3)i32_3x3; + i32_4x4 = (int4x4)i16_3x3; // expected-error {{conversion between matrix types 'int4x4' (aka 'matrix') and 'matrix' of different size is not allowed}} + f32_4x4 = (int4x4)i32_4x4; // expected-error {{assigning to 'matrix' from incompatible type 'matrix'}} + i = (int)i32_4x4; // expected-error {{C-style cast from 'int4x4' (aka 'matrix') to 'int' is not allowed}} + i32_4x4 = (int4x4)i; // expected-error {{C-style cast from 'int' to 'int4x4' (aka 'matrix') is not allowed}} + v = (float4)i32_4x4; // expected-error {{C-style cast from 'int4x4' (aka 'matrix') to 'float4' (aka 'vector') is not allowed}} + i32_4x4 = (int4x4)v; // expected-error {{C-style cast from 'float4' (aka 'vector') to 'int4x4' (aka 'matrix') is not allowed}} + s = (test_struct)i16_3x3; // expected-error {{no matching conversion for C-style cast from 'int16_t3x3' (aka 'matrix') to 'test_struct'}} + i16_3x3 = (int16_t3x3)s; // expected-error {{cannot convert 'test_struct' to 'int16_t3x3' (aka 'matrix') without a conversion operator}} + + i32_4x4 = (int4x4)f32_4x4; +} + +void f2(void) { + float2x2 f32_2x2; + double3x3 f64_3x3; + double2x2 f64_2x2; + int4x4 i32_4x4; + uint4x4 u32_4x4; + uint3x3 u32_3x3; + float f; + + f64_3x3 = (double3x3)f32_2x2; // expected-error {{conversion between matrix types 'double3x3' (aka 'matrix') and 'matrix' of different size is not allowed}} + f64_2x2 = (double2x2)f32_2x2; + + u32_4x4 = (uint4x4)i32_4x4; + i32_4x4 = (int4x4)u32_4x4; + u32_3x3 = (uint3x3)i32_4x4; // expected-error {{conversion between matrix types 'uint3x3' (aka 'matrix') and 'matrix' of different size is not allowed}} + f = (float)i32_4x4; // expected-error {{C-style cast from 'int4x4' (aka 'matrix') to 'float' is not allowed}} + i32_4x4 = (int4x4)f; // expected-error {{C-style cast from 'float' to 'int4x4' (aka 'matrix') is not allowed}} +} + +template +using matrix_3_3 = matrix; + +template +using matrix_4_4 = matrix; + +void f3() { + matrix_3_3 u16_3x3; + matrix_3_3 i32_3x3; + matrix_3_3 i16_3x3; + matrix_4_4 i32_4x4; + matrix_4_4 f32_4x4; + int i; + int4 v; + test_struct s; + + i32_3x3 = (matrix_3_3)u16_3x3; + i32_3x3 = u16_3x3; // expected-error {{assigning to 'matrix_3_3' from incompatible type 'matrix_3_3'}} + i16_3x3 = (matrix_3_3)i32_3x3; + i32_4x4 = (matrix_4_4)i16_3x3; // expected-error {{conversion between matrix types 'matrix_4_4' (aka 'matrix') and 'matrix' of different size is not allowed}} + + i = (int)i16_3x3; // expected-error {{C-style cast from 'matrix_3_3' (aka 'matrix') to 'int' is not allowed}} + i32_3x3 = (matrix_3_3)i; // expected-error {{C-style cast from 'int' to 'matrix_3_3' (aka 'matrix') is not allowed}} + + v = (int4)i32_3x3; // expected-error {{C-style cast from 'matrix_3_3' (aka 'matrix') to 'int4' (aka 'vector') is not allowed}} + u16_3x3 = (matrix_3_3)v; // expected-error {{C-style cast from 'int4' (aka 'vector') to 'matrix_3_3' (aka 'matrix') is not allowed}} + s = (test_struct)u16_3x3; // expected-error {{no matching conversion for C-style cast from 'matrix_3_3' (aka 'matrix') to 'test_struct'}} + f32_4x4 = (matrix_4_4)s; // expected-error {{cannot convert 'test_struct' to 'matrix_4_4' (aka 'matrix') without a conversion operator}} +} + +void f4() { + matrix_3_3 u16_3x3; + matrix_3_3 i32_3x3; + matrix_3_3 i16_3x3; + matrix_4_4 i32_4x4; + matrix_4_4 f32_4x4; + int i; + int4 v; + test_struct s; + + i32_3x3 = static_cast>(u16_3x3); + i16_3x3 = static_cast>(i32_3x3); + i32_4x4 = static_cast>(i16_3x3); // expected-error {{conversion between matrix types 'matrix_4_4' (aka 'matrix') and 'matrix' of different size is not allowed}} + + i = static_cast(i16_3x3); // expected-error {{static_cast from 'matrix_3_3' (aka 'matrix') to 'int' is not allowed}} + i32_3x3 = static_cast>(i); // expected-error {{static_cast from 'int' to 'matrix_3_3' (aka 'matrix') is not allowed}} + + v = static_cast(i32_3x3); // expected-error {{static_cast from 'matrix_3_3' (aka 'matrix') to 'int4' (aka 'vector') is not allowed}} + i16_3x3 = static_cast>(v); // expected-error {{static_cast from 'int4' (aka 'vector') to 'matrix_3_3' (aka 'matrix') is not allowed}} + + s = static_cast(u16_3x3); // expected-error {{no matching conversion for static_cast from 'matrix_3_3' (aka 'matrix') to 'test_struct'}} + f32_4x4 = static_cast>(s); // expected-error {{cannot convert 'test_struct' to 'matrix_4_4' (aka 'matrix') without a conversion operator}} +} + +void f5() { + matrix_3_3 f32_3x3; + matrix_3_3 f64_3x3; + matrix_4_4 f64_4x4; + matrix_4_4 i32_4x4; + matrix_3_3 u32_3x3; + matrix_4_4 u32_4x4; + float f; + + f64_3x3 = (matrix_3_3)f32_3x3; + f64_4x4 = (matrix_4_4)f32_3x3; // expected-error {{conversion between matrix types 'matrix_4_4' (aka 'matrix') and 'matrix' of different size is not allowed}} + i32_4x4 = (matrix_4_4)f64_4x4; + u32_3x3 = (matrix_4_4)i32_4x4; // expected-error {{assigning to 'matrix<[...], 3, 3>' from incompatible type 'matrix<[...], 4, 4>'}} + u32_4x4 = (matrix_4_4)i32_4x4; + i32_4x4 = (matrix_4_4)u32_4x4; +} + +void f6() { + matrix_3_3 f32_3x3; + matrix_3_3 f64_3x3; + matrix_4_4 f64_4x4; + matrix_4_4 i32_4x4; + matrix_3_3 u32_3x3; + matrix_4_4 u32_4x4; + float f; + + f64_3x3 = static_cast>(f32_3x3); + f64_4x4 = static_cast>(f32_3x3); // expected-error {{conversion between matrix types 'matrix_4_4' (aka 'matrix') and 'matrix' of different size is not allowed}} + + i32_4x4 = static_cast>(f64_4x4); + u32_3x3 = static_cast>(i32_4x4); // expected-error {{assigning to 'matrix<[...], 3, 3>' from incompatible type 'matrix<[...], 4, 4>'}} + u32_4x4 = static_cast>(i32_4x4); + i32_4x4 = static_cast>(u32_4x4); +} diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-index-operator-type.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-index-operator-type.hlsl new file mode 100644 index 0000000000000..29640ae01d6fb --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-index-operator-type.hlsl @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -fnative-half-type -finclude-default-header -fsyntax-only %s -verify + +double indexi32(matrix X, int i) { return X[i][0]; } + +double indexu32(matrix X, uint i) { return X[i][0]; } + +double indexi16(matrix X, int16_t i) { return X[i][0]; } + +double indexu16(matrix X, uint16_t i) { return X[i][0]; } + +double indexi64(matrix X, int64_t i) { return X[i][0]; } + +double indexu64(matrix X, uint64_t i) { return X[i][0]; } + +double indexi32c(matrix X, int i) { return X[0][i]; } + +double indexu32c(matrix X, uint i) { return X[0][i]; } + +double indexi16c(matrix X, int16_t i) { return X[0][i]; } + +double indexu16c(matrix X, uint16_t i) { return X[0][i]; } + +double indexi64c(matrix X, int64_t i) { return X[0][i]; } + +double indexu64c(matrix X, uint64_t i) { return X[0][i]; } + +// expected-no-diagnostics diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-transpose.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-transpose.hlsl new file mode 100644 index 0000000000000..4423e7cde4bd5 --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-transpose.hlsl @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -fnative-half-type -finclude-default-header -fsyntax-only %s -verify + +void transpose(float3x4 a, int3x2 b, double3x3 c, int e) { + + a = __builtin_matrix_transpose(b); + // expected-error@-1 {{assigning to 'float3x4' (aka 'matrix') from incompatible type 'matrix'}} + b = __builtin_matrix_transpose(b); + // expected-error@-1 {{assigning to 'int3x2' (aka 'matrix') from incompatible type 'matrix'}} + __builtin_matrix_transpose(e); + // expected-error@-1 {{1st argument must be a matrix}} + __builtin_matrix_transpose("test"); + // expected-error@-1 {{1st argument must be a matrix}} + + uint3x3 m = __builtin_matrix_transpose(c); + // expected-error@-1 {{cannot initialize a variable of type 'uint3x3' (aka 'matrix') with an rvalue of type 'matrix'}} +} + +template +struct MyMatrix { + using matrix_t = matrix; + + matrix_t value; +}; + +template +typename MyMatrix::matrix_t transpose(inout MyMatrix A) { + uint16_t v1 = __builtin_matrix_transpose(A.value); + // expected-error@-1 {{cannot initialize a variable of type 'uint16_t' (aka 'unsigned short') with an rvalue of type 'matrix'}} + // expected-error@-2 2 {{cannot initialize a variable of type 'uint16_t' (aka 'unsigned short') with an rvalue of type 'matrix'}} + + __builtin_matrix_transpose(A); + // expected-error@-1 3 {{1st argument must be a matrix}} + + return __builtin_matrix_transpose(A.value); + // expected-error@-1 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'matrix') with an rvalue of type 'matrix'}} + // expected-error@-2 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'matrix') with an rvalue of type 'matrix'}} + // expected-error@-3 {{cannot initialize return object of type 'typename MyMatrix::matrix_t' (aka 'matrix') with an rvalue of type 'matrix'}} +} + +void test_transpose_template() { + MyMatrix Mat1; + MyMatrix Mat2; + Mat1.value = transpose(Mat1); + // expected-note@-1 {{in instantiation of function template specialization 'transpose' requested here}} + + Mat1.value = transpose(Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'transpose' requested here}} + + MyMatrix Mat3; + Mat3.value = transpose(Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'transpose' requested here}} +} + diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-type-operators.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-type-operators.hlsl new file mode 100644 index 0000000000000..fd62c300857fe --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-type-operators.hlsl @@ -0,0 +1,307 @@ +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -fnative-half-type -finclude-default-header -fsyntax-only %s -verify + +void add(float4x4 a, float3x4 b, float4x3 c) { + a = b + c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'float4x3' (aka 'matrix'))}} + + b += c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'float4x3' (aka 'matrix'))}} + + a = b + b; // expected-error {{assigning to 'matrix<[...], 4, [...]>' from incompatible type 'matrix<[...], 3, [...]>'}} + + a = 10 + b; + // expected-error@-1 {{assigning to 'matrix<[...], 4, [...]>' from incompatible type 'matrix<[...], 3, [...]>'}} +} + +void sub(float4x4 a, float3x4 b, float4x3 c) { + a = b - c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'float4x3' (aka 'matrix'))}} + + b -= c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'float4x3' (aka 'matrix'))}} + + a = b - b; // expected-error {{assigning to 'matrix<[...], 4, [...]>' from incompatible type 'matrix<[...], 3, [...]>'}} + + a = 10 - b; + // expected-error@-1 {{assigning to 'matrix<[...], 4, [...]>' from incompatible type 'matrix<[...], 3, [...]>'}} + +} + +void matrix_matrix_multiply(float4x4 a, float3x4 b, int4x3 c, int4x4 d, float sf, inout uint16_t p) { + // Check dimension mismatches. + a = a * b; + // expected-error@-1 {{invalid operands to binary expression ('float4x4' (aka 'matrix') and 'float3x4' (aka 'matrix'))}} + a *= b; + // expected-error@-1 {{invalid operands to binary expression ('float4x4' (aka 'matrix') and 'float3x4' (aka 'matrix'))}} + b = a * a; + // expected-error@-1 {{assigning to 'matrix<[...], 3, [...]>' from incompatible type 'matrix<[...], 4, [...]>'}} + + // Check element type mismatches. + a = b * c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'int4x3' (aka 'matrix'))}} + b *= c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'int4x3' (aka 'matrix'))}} + d = a * a; + // expected-error@-1 {{assigning to 'matrix' from incompatible type 'matrix'}} + + p = a * a; + // expected-error@-1 {{assigning to 'uint16_t' (aka 'unsigned short') from incompatible type 'float4x4' (aka 'matrix')}} +} + +void mat_scalar_multiply(float4x4 a, float3x4 b, float sf, inout uint16_t p) { + // Shape of multiplication result does not match the type of b. + b = a * sf; + // expected-error@-1 {{assigning to 'matrix<[...], 3, [...]>' from incompatible type 'matrix<[...], 4, [...]>'}} + b = sf * a; + // expected-error@-1 {{assigning to 'matrix<[...], 3, [...]>' from incompatible type 'matrix<[...], 4, [...]>'}} + + sf = a * sf; + // expected-error@-1 {{assigning to 'float' from incompatible type 'float4x4' (aka 'matrix')}} +} + +void mat_scalar_divide(float4x4 a, float3x4 b, float sf, inout uint16_t p) { + // Shape of multiplication result does not match the type of b. + b = a / sf; + // expected-error@-1 {{assigning to 'matrix<[...], 3, [...]>' from incompatible type 'matrix<[...], 4, [...]>'}} + b = sf / a; + // expected-error@-1 {{invalid operands to binary expression ('float' and 'float4x4' (aka 'matrix'))}} + + a = p / a; + // expected-error@-1 {{invalid operands to binary expression ('uint16_t' (aka 'unsigned short') and 'float4x4' (aka 'matrix'))}} + + sf = a / sf; + // expected-error@-1 {{assigning to 'float' from incompatible type 'float4x4' (aka 'matrix')}} +} + +void matrix_matrix_divide(float4x4 a, float3x4 b, int4x3 c, int4x4 d, float sf, uint16_t p) { + // Matrix by matrix division is not supported. + a = a / a; + // expected-error@-1 {{invalid operands to binary expression ('float4x4' (aka 'matrix') and 'float4x4')}} + + b = a / a; + // expected-error@-1 {{invalid operands to binary expression ('float4x4' (aka 'matrix') and 'float4x4')}} + + // Check element type mismatches. + a = b / c; + // expected-error@-1 {{invalid operands to binary expression ('float3x4' (aka 'matrix') and 'int4x3' (aka 'matrix'))}} + d = a / a; + // expected-error@-1 {{invalid operands to binary expression ('float4x4' (aka 'matrix') and 'float4x4')}} + + p = a / a; + // expected-error@-1 {{invalid operands to binary expression ('float4x4' (aka 'matrix') and 'float4x4')}} +} + +float3x4 get_matrix(void); + +void insert(float3x4 a, float f) { + // Non integer indexes. + a[1][f] = 0; + // expected-error@-1 {{matrix column index is not an integer}} + a[f][2] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + a[f][f] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + // expected-error@-2 {{matrix column index is not an integer}} + a[0][f] = 0; + // expected-error@-1 {{matrix column index is not an integer}} + + a[f][f] = 0; + // expected-error@-1 {{matrix row index is not an integer}} + // expected-error@-2 {{matrix column index is not an integer}} + + // Indexes outside allowed dimensions. + a[-1][3] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + a[2][-1] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 4)}} + a[2][-1u] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 4)}} + a[-1u][3] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + a[5][2] = 10.0; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + a[2][10] = 10.0; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 4)}} + a[3][2.0] = f; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + // expected-error@-2 {{matrix column index is not an integer}} + (a[1])[1] = f; + // expected-error@-1 {{matrix row and column subscripts cannot be separated by any expression}} + + get_matrix()[0][0] = f; + // expected-error@-1 {{expression is not assignable}} + get_matrix()[3][1.0] = f; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + // expected-error@-2 {{matrix column index is not an integer}} + + (get_matrix()[0])[2] = f; + // expected-error@-1 {{matrix row and column subscripts cannot be separated by any expression}} + + a[4, 5] = 5.0; + // expected-error@-1 {{comma expressions are not allowed as indices in matrix subscript expressions}} + // expected-warning@-2 {{left operand of comma operator has no effect}} + + a[4, 5, 4] = 5.0; + // expected-error@-1 {{comma expressions are not allowed as indices in matrix subscript expressions}} + // expected-warning@-2 {{left operand of comma operator has no effect}} + // expected-warning@-3 {{left operand of comma operator has no effect}} +} + +void extract(float3x4 a, float f) { + // Non integer indexes. + float v1 = a[2][f]; + // expected-error@-1 {{matrix column index is not an integer}} + float v2 = a[f][3]; + // expected-error@-1 {{matrix row index is not an integer}} + float v3 = a[f][f]; + // expected-error@-1 {{matrix row index is not an integer}} + // expected-error@-2 {{matrix column index is not an integer}} + + // Indexes outside allowed dimensions. + float v5 = a[-1][3]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + float v6 = a[2][-1]; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 4)}} + float v8 = a[-1u][3]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + float v9 = a[5][2]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + float v10 = a[2][4]; + // expected-error@-1 {{matrix column index is outside the allowed range [0, 4)}} + float v11 = a[3][2.0]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + // expected-error@-2 {{matrix column index is not an integer}} + + float v12 = get_matrix()[0][0]; + float v13 = get_matrix()[3][2.0]; + // expected-error@-1 {{matrix row index is outside the allowed range [0, 3)}} + // expected-error@-2 {{matrix column index is not an integer}} + +} + +template +struct MyMatrix { + using matrix_t = matrix; + + matrix_t value; +}; + +template +typename MyMatrix::matrix_t add(inout MyMatrix A, inout MyMatrix B) { + uint16_t v1 = A.value + B.value; + // expected-error@-1 {{cannot initialize a variable of type 'uint16_t' (aka 'unsigned short') with an rvalue of type 'matrix_t' (aka 'matrix')}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + // expected-error@-3 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + + return A.value + B.value; + // expected-error@-1 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} +} + +void test_add_template() { + MyMatrix Mat1; + MyMatrix Mat2; + MyMatrix Mat3; + unsigned v1 = add(Mat1, Mat1); + // expected-error@-1 {{cannot initialize a variable of type 'unsigned int' with an rvalue of type 'typename MyMatrix::matrix_t' (aka 'matrix')}} + // expected-note@-2 {{in instantiation of function template specialization 'add' requested here}} + + Mat1.value = add(Mat1, Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'add' requested here}} + + Mat1.value = add(Mat2, Mat3); + // expected-note@-1 {{in instantiation of function template specialization 'add' requested here}} +} + +template +typename MyMatrix::matrix_t subtract(inout MyMatrix A, inout MyMatrix B) { + uint16_t v1 = A.value - B.value; + // expected-error@-1 {{cannot initialize a variable of type 'uint16_t' (aka 'unsigned short') with an rvalue of type 'matrix_t' (aka 'matrix')}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix')}} + // expected-error@-3 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix')}} + + return A.value - B.value; + // expected-error@-1 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix')}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix')}} +} + +void test_subtract_template() { + MyMatrix Mat1; + MyMatrix Mat2; + MyMatrix Mat3; + unsigned v1 = subtract(Mat1, Mat1); + // expected-error@-1 {{cannot initialize a variable of type 'unsigned int' with an rvalue of type 'typename MyMatrix::matrix_t' (aka 'matrix')}} + // expected-note@-2 {{in instantiation of function template specialization 'subtract' requested here}} + + Mat1.value = subtract(Mat1, Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'subtract' requested here}} + + Mat1.value = subtract(Mat2, Mat3); + // expected-note@-1 {{in instantiation of function template specialization 'subtract' requested here}} +} + +template +typename MyMatrix::matrix_t multiply(inout MyMatrix A, inout MyMatrix B) { + uint16_t v1 = A.value * B.value; + // expected-error@-1 {{cannot initialize a variable of type 'uint16_t' (aka 'unsigned short') with an rvalue of type 'matrix_t' (aka 'matrix')}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + // expected-error@-3 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + + MyMatrix m; + B.value = m.value * A.value; + // expected-error@-1 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + // expected-error@-3 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + + return A.value * B.value; + // expected-error@-1 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'matrix_t' (aka 'matrix'))}} +} + +void test_multiply_template() { + MyMatrix Mat1; + MyMatrix Mat2; + MyMatrix Mat3; + unsigned v1 = multiply(Mat1, Mat1); + // expected-note@-1 {{in instantiation of function template specialization 'multiply' requested here}} + // expected-error@-2 {{cannot initialize a variable of type 'unsigned int' with an rvalue of type 'typename MyMatrix::matrix_t' (aka 'matrix')}} + + MyMatrix Mat4; + Mat1.value = multiply(Mat4, Mat2); + // expected-note@-1 {{in instantiation of function template specialization 'multiply' requested here}} + + Mat1.value = multiply(Mat3, Mat1); + // expected-note@-1 {{in instantiation of function template specialization 'multiply' requested here}} + + Mat4.value = Mat4.value * Mat1; + // expected-error@-1 {{no viable conversion from 'MyMatrix' to 'unsigned int'}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'MyMatrix')}} +} + +struct UserT {}; + +struct StructWithC { + operator UserT() { + // expected-note@-1 4 {{candidate function}} + return {}; + } +}; + +void test_DoubleWrapper(inout MyMatrix m, inout StructWithC c) { + m.value = m.value + c; + // expected-error@-1 {{no viable conversion from 'StructWithC' to 'double'}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'StructWithC')}} + + m.value = c + m.value; + // expected-error@-1 {{no viable conversion from 'StructWithC' to 'double'}} + // expected-error@-2 {{invalid operands to binary expression ('StructWithC' and 'matrix_t' (aka 'matrix'))}} + + m.value = m.value - c; + // expected-error@-1 {{no viable conversion from 'StructWithC' to 'double'}} + // expected-error@-2 {{invalid operands to binary expression ('matrix_t' (aka 'matrix') and 'StructWithC')}} + + m.value = c - m.value; + // expected-error@-1 {{no viable conversion from 'StructWithC' to 'double'}} + // expected-error@-2 {{invalid operands to binary expression ('StructWithC' and 'matrix_t' (aka 'matrix'))}} +} + diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-type.hlsl b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-type.hlsl new file mode 100644 index 0000000000000..fe374f388d104 --- /dev/null +++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/matrix-type.hlsl @@ -0,0 +1,48 @@ +// A note points to the external source at present, so we have to ignore it. +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -fnative-half-type -finclude-default-header -fsyntax-only %s -verify -verify-ignore-unexpected=note +// All the errors are actually in the external source at present, so we have to ignore them. +// The notes point to the proper lines though. +// RUN: %clang_cc1 -triple spirv-unknown-vulkan-compute -fnative-half-type -finclude-default-header -fsyntax-only -DMTXTYPE %s -verify=mtxtype -verify-ignore-unexpected=error + +#ifndef MTXTYPE +void matrix_var_dimensions(int Rows, unsigned Columns, uint16_t C) { + // expected-note@-1 3{{declared here}} + matrix m1; // expected-error{{non-type template argument is not a constant expression}} + // expected-note@-1{{function parameter 'Rows' with unknown value cannot be used in a constant expression}} + matrix m2; // expected-error{{non-type template argument is not a constant expression}} + // expected-note@-1{{function parameter 'Columns' with unknown value cannot be used in a constant expression}} + matrix m3; // expected-error{{non-type template argument is not a constant expression}} + // expected-note@-1{{function parameter 'C' with unknown value cannot be used in a constant expression}} + matrix m8; // expected-error{{template argument for non-type template parameter must be an expression}} + +} +#else +struct S1 {}; + +enum TestEnum { + A, + B +}; + +void matrix_unsupported_element_type() { + // The future-errors are not checked yet since they are predeclared and are ignored. + matrix m1; // future-error{{invalid matrix element type 'S1'}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + matrix m2; // future-error{{invalid matrix element type 'bool'}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + matrix m3; // future-error{{invalid matrix element type 'TestEnum'}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + + matrix m4; // future-error{{matrix row size too large}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + matrix m5; // future-error{{matrix column size too large}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + matrix m6; // future-error{{zero matrix size}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + matrix m7; // future-error{{zero matrix size}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + matrix m9; // future-error{{matrix row size too large}} + // mtxtype-note@-1{{in instantiation of template type alias 'matrix' requested here}} + +} +#endif