@@ -12,7 +12,7 @@ define amdgpu_kernel void @exp_f16(ptr addrspace(1) %ptr) {
1212  ; CHECK-NEXT:   [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1) 
1313  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
1414  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 
15-   ; CHECK-NEXT:   [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_EXP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
15+   ; CHECK-NEXT:   [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept  V_EXP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
1616  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
1717  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_EXP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 
1818  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] 
@@ -35,7 +35,7 @@ define amdgpu_kernel void @log_f16(ptr addrspace(1) %ptr) {
3535  ; CHECK-NEXT:   [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1) 
3636  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
3737  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 
38-   ; CHECK-NEXT:   [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_LOG_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
38+   ; CHECK-NEXT:   [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept  V_LOG_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
3939  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
4040  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_LOG_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 
4141  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] 
@@ -58,7 +58,7 @@ define amdgpu_kernel void @rcp_f16(ptr addrspace(1) %ptr) {
5858  ; CHECK-NEXT:   [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1) 
5959  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
6060  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 
61-   ; CHECK-NEXT:   [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RCP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
61+   ; CHECK-NEXT:   [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept  V_RCP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
6262  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
6363  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_RCP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 
6464  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] 
@@ -81,7 +81,7 @@ define amdgpu_kernel void @rsq_f16(ptr addrspace(1) %ptr) {
8181  ; CHECK-NEXT:   [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1) 
8282  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
8383  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 
84-   ; CHECK-NEXT:   [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RSQ_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
84+   ; CHECK-NEXT:   [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept  V_RSQ_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
8585  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
8686  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_RSQ_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 
8787  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] 
@@ -104,7 +104,7 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) {
104104  ; CHECK-NEXT:   [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1) 
105105  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
106106  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 
107-   ; CHECK-NEXT:   [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_SQRT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
107+   ; CHECK-NEXT:   [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept  V_SQRT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec 
108108  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF 
109109  ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_SQRT_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 
110110  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] 
0 commit comments