@@ -21,6 +21,7 @@ typedef float __attribute__((ext_vector_type(8))) float8;
2121typedef float __attribute__ ((ext_vector_type (16 ))) float16 ;
2222typedef float __attribute__ ((ext_vector_type (32 ))) float32 ;
2323typedef short __attribute__ ((ext_vector_type (2 ))) short2 ;
24+ typedef unsigned short __attribute__((ext_vector_type (2 ))) ushort2 ;
2425
2526// CHECK-LABEL: @test_setprio_inc_wg(
2627// CHECK-NEXT: entry:
@@ -1718,3 +1719,111 @@ void test_cvt_f32_fp8_e5m3(global int* out, int a)
17181719 * out = __builtin_amdgcn_cvt_f32_fp8_e5m3 (a , 2 );
17191720 * out = __builtin_amdgcn_cvt_f32_fp8_e5m3 (a , 3 );
17201721}
1722+
1723+ // CHECK-LABEL: @test_add_min_max(
1724+ // CHECK-NEXT: entry:
1725+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1726+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1727+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1728+ // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
1729+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1730+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
1731+ // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
1732+ // CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
1733+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1734+ // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
1735+ // CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
1736+ // CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4
1737+ // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
1738+ // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
1739+ // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
1740+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.add.max.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false)
1741+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1742+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1743+ // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
1744+ // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
1745+ // CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
1746+ // CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.add.max.u32(i32 [[TMP5]], i32 [[TMP6]], i32 [[TMP7]], i1 true)
1747+ // CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1748+ // CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
1749+ // CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
1750+ // CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
1751+ // CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
1752+ // CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.add.min.i32(i32 [[TMP10]], i32 [[TMP11]], i32 [[TMP12]], i1 false)
1753+ // CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1754+ // CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
1755+ // CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
1756+ // CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
1757+ // CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
1758+ // CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.add.min.u32(i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP17]], i1 true)
1759+ // CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1760+ // CHECK-NEXT: store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
1761+ // CHECK-NEXT: ret void
1762+ //
1763+ void test_add_min_max (global int * out , int a , int b , int c )
1764+ {
1765+ * out = __builtin_amdgcn_add_max_i32 (a , b , c , false);
1766+ * out = __builtin_amdgcn_add_max_u32 (a , b , c , true);
1767+ * out = __builtin_amdgcn_add_min_i32 (a , b , c , false);
1768+ * out = __builtin_amdgcn_add_min_u32 (a , b , c , true);
1769+ }
1770+
1771+ // CHECK-LABEL: @test_pk_add_min_max(
1772+ // CHECK-NEXT: entry:
1773+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1774+ // CHECK-NEXT: [[UOUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
1775+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
1776+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
1777+ // CHECK-NEXT: [[C_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
1778+ // CHECK-NEXT: [[UA_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
1779+ // CHECK-NEXT: [[UB_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
1780+ // CHECK-NEXT: [[UC_ADDR:%.*]] = alloca <2 x i16>, align 4, addrspace(5)
1781+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
1782+ // CHECK-NEXT: [[UOUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[UOUT_ADDR]] to ptr
1783+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
1784+ // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
1785+ // CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
1786+ // CHECK-NEXT: [[UA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[UA_ADDR]] to ptr
1787+ // CHECK-NEXT: [[UB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[UB_ADDR]] to ptr
1788+ // CHECK-NEXT: [[UC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[UC_ADDR]] to ptr
1789+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
1790+ // CHECK-NEXT: store ptr addrspace(1) [[UOUT:%.*]], ptr [[UOUT_ADDR_ASCAST]], align 8
1791+ // CHECK-NEXT: store <2 x i16> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
1792+ // CHECK-NEXT: store <2 x i16> [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
1793+ // CHECK-NEXT: store <2 x i16> [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4
1794+ // CHECK-NEXT: store <2 x i16> [[UA:%.*]], ptr [[UA_ADDR_ASCAST]], align 4
1795+ // CHECK-NEXT: store <2 x i16> [[UB:%.*]], ptr [[UB_ADDR_ASCAST]], align 4
1796+ // CHECK-NEXT: store <2 x i16> [[UC:%.*]], ptr [[UC_ADDR_ASCAST]], align 4
1797+ // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[A_ADDR_ASCAST]], align 4
1798+ // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[B_ADDR_ASCAST]], align 4
1799+ // CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[C_ADDR_ASCAST]], align 4
1800+ // CHECK-NEXT: [[TMP3:%.*]] = call <2 x i16> @llvm.amdgcn.pk.add.max.i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]], <2 x i16> [[TMP2]], i1 false)
1801+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1802+ // CHECK-NEXT: store <2 x i16> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
1803+ // CHECK-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr [[UA_ADDR_ASCAST]], align 4
1804+ // CHECK-NEXT: [[TMP6:%.*]] = load <2 x i16>, ptr [[UB_ADDR_ASCAST]], align 4
1805+ // CHECK-NEXT: [[TMP7:%.*]] = load <2 x i16>, ptr [[UC_ADDR_ASCAST]], align 4
1806+ // CHECK-NEXT: [[TMP8:%.*]] = call <2 x i16> @llvm.amdgcn.pk.add.max.u16(<2 x i16> [[TMP5]], <2 x i16> [[TMP6]], <2 x i16> [[TMP7]], i1 true)
1807+ // CHECK-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[UOUT_ADDR_ASCAST]], align 8
1808+ // CHECK-NEXT: store <2 x i16> [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
1809+ // CHECK-NEXT: [[TMP10:%.*]] = load <2 x i16>, ptr [[A_ADDR_ASCAST]], align 4
1810+ // CHECK-NEXT: [[TMP11:%.*]] = load <2 x i16>, ptr [[B_ADDR_ASCAST]], align 4
1811+ // CHECK-NEXT: [[TMP12:%.*]] = load <2 x i16>, ptr [[C_ADDR_ASCAST]], align 4
1812+ // CHECK-NEXT: [[TMP13:%.*]] = call <2 x i16> @llvm.amdgcn.pk.add.min.i16(<2 x i16> [[TMP10]], <2 x i16> [[TMP11]], <2 x i16> [[TMP12]], i1 false)
1813+ // CHECK-NEXT: [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
1814+ // CHECK-NEXT: store <2 x i16> [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
1815+ // CHECK-NEXT: [[TMP15:%.*]] = load <2 x i16>, ptr [[UA_ADDR_ASCAST]], align 4
1816+ // CHECK-NEXT: [[TMP16:%.*]] = load <2 x i16>, ptr [[UB_ADDR_ASCAST]], align 4
1817+ // CHECK-NEXT: [[TMP17:%.*]] = load <2 x i16>, ptr [[UC_ADDR_ASCAST]], align 4
1818+ // CHECK-NEXT: [[TMP18:%.*]] = call <2 x i16> @llvm.amdgcn.pk.add.min.u16(<2 x i16> [[TMP15]], <2 x i16> [[TMP16]], <2 x i16> [[TMP17]], i1 true)
1819+ // CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[UOUT_ADDR_ASCAST]], align 8
1820+ // CHECK-NEXT: store <2 x i16> [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
1821+ // CHECK-NEXT: ret void
1822+ //
1823+ void test_pk_add_min_max (global short2 * out , global ushort2 * uout , short2 a , short2 b , short2 c , ushort2 ua , ushort2 ub , ushort2 uc )
1824+ {
1825+ * out = __builtin_amdgcn_pk_add_max_i16 (a , b , c , false);
1826+ * uout = __builtin_amdgcn_pk_add_max_u16 (ua , ub , uc , true);
1827+ * out = __builtin_amdgcn_pk_add_min_i16 (a , b , c , false);
1828+ * uout = __builtin_amdgcn_pk_add_min_u16 (ua , ub , uc , true);
1829+ }
0 commit comments