@@ -92,40 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp
92
92
ret void
93
93
}
94
94
95
- ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
96
- ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
97
- ; SI-NOT: xor
98
- define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #0 {
99
- %b_ptr = getelementptr float , ptr addrspace (1 ) %in , i32 1
100
- %a = load float , ptr addrspace (1 ) %in , align 4
101
- %b = load float , ptr addrspace (1 ) %b_ptr , align 4
102
- %result = fsub float %a , %b
103
- %neg.result = fsub nsz float -0 .0 , %result
104
- store float %neg.result , ptr addrspace (1 ) %out , align 4
105
- ret void
106
- }
107
-
108
- ; For some reason the attribute has a string "true" or "false", so
109
- ; make sure it is disabled and the fneg is not folded if it is not
110
- ; "true".
111
- ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
112
- ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
113
- ; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
114
- define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) #1 {
115
- %b_ptr = getelementptr float , ptr addrspace (1 ) %in , i32 1
116
- %a = load float , ptr addrspace (1 ) %in , align 4
117
- %b = load float , ptr addrspace (1 ) %b_ptr , align 4
118
- %result = fsub float %a , %b
119
- %neg.result = fsub float -0 .0 , %result
120
- store float %neg.result , ptr addrspace (1 ) %out , align 4
121
- ret void
122
- }
123
-
124
- ; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
95
+ ; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32:
125
96
; SI-NOT: v_sub
126
- define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) # 0 {
97
+ define amdgpu_kernel void @v_fsub_0_nsz_flag_f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in ) {
127
98
%a = load float , ptr addrspace (1 ) %in , align 4
128
- %result = fsub float %a , 0 .0
99
+ %result = fsub nsz float %a , 0 .0
129
100
store float %result , ptr addrspace (1 ) %out , align 4
130
101
ret void
131
102
}
0 commit comments