@@ -210,3 +210,129 @@ define double @atomicrmw_fmaximum_double(ptr %ptr, double %value) {
210210 %res = atomicrmw fmaximum ptr %ptr , double %value seq_cst
211211 ret double %res
212212}
213+
214+ define bfloat @atomicrmw_fmaximum_bfloat (ptr %ptr , bfloat %val ) {
215+ ; CHECK-LABEL: @atomicrmw_fmaximum_bfloat(
216+ ; CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[PTR:%.*]], align 2
217+ ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
218+ ; CHECK: atomicrmw.start:
219+ ; CHECK-NEXT: [[LOADED:%.*]] = phi bfloat [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
220+ ; CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.maximum.bf16(bfloat [[LOADED]], bfloat [[VAL:%.*]])
221+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[TMP2]] to i16
222+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast bfloat [[LOADED]] to i16
223+ ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2
224+ ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
225+ ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
226+ ; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to bfloat
227+ ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
228+ ; CHECK: atomicrmw.end:
229+ ; CHECK-NEXT: ret bfloat [[TMP6]]
230+ ;
231+ %res = atomicrmw fmaximum ptr %ptr , bfloat %val seq_cst
232+ ret bfloat %res
233+ }
234+
235+ define half @atomicrmw_fmaximum_half (ptr %ptr , half %val ) {
236+ ; CHECK-LABEL: @atomicrmw_fmaximum_half(
237+ ; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[PTR:%.*]], align 2
238+ ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
239+ ; CHECK: atomicrmw.start:
240+ ; CHECK-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
241+ ; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.maximum.f16(half [[LOADED]], half [[VAL:%.*]])
242+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16
243+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16
244+ ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2
245+ ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
246+ ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
247+ ; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half
248+ ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
249+ ; CHECK: atomicrmw.end:
250+ ; CHECK-NEXT: ret half [[TMP6]]
251+ ;
252+ %res = atomicrmw fmaximum ptr %ptr , half %val seq_cst
253+ ret half %res
254+ }
255+
256+ define <2 x half > @atomicrmw_fmaximum_2_x_half (ptr %ptr , <2 x half > %val ) {
257+ ; CHECK-LABEL: @atomicrmw_fmaximum_2_x_half(
258+ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4
259+ ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
260+ ; CHECK: atomicrmw.start:
261+ ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
262+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.maximum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VAL:%.*]])
263+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32
264+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32
265+ ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
266+ ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
267+ ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
268+ ; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half>
269+ ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
270+ ; CHECK: atomicrmw.end:
271+ ; CHECK-NEXT: ret <2 x half> [[TMP6]]
272+ ;
273+ %res = atomicrmw fmaximum ptr %ptr , <2 x half > %val seq_cst
274+ ret <2 x half > %res
275+ }
276+
277+ define bfloat @atomicrmw_fminimum_bfloat (ptr %ptr , bfloat %val ) {
278+ ; CHECK-LABEL: @atomicrmw_fminimum_bfloat(
279+ ; CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[PTR:%.*]], align 2
280+ ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
281+ ; CHECK: atomicrmw.start:
282+ ; CHECK-NEXT: [[LOADED:%.*]] = phi bfloat [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
283+ ; CHECK-NEXT: [[TMP2:%.*]] = call bfloat @llvm.minimum.bf16(bfloat [[LOADED]], bfloat [[VAL:%.*]])
284+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat [[TMP2]] to i16
285+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast bfloat [[LOADED]] to i16
286+ ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2
287+ ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
288+ ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
289+ ; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to bfloat
290+ ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
291+ ; CHECK: atomicrmw.end:
292+ ; CHECK-NEXT: ret bfloat [[TMP6]]
293+ ;
294+ %res = atomicrmw fminimum ptr %ptr , bfloat %val seq_cst
295+ ret bfloat %res
296+ }
297+
298+ define half @atomicrmw_fminimum_half (ptr %ptr , half %val ) {
299+ ; CHECK-LABEL: @atomicrmw_fminimum_half(
300+ ; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[PTR:%.*]], align 2
301+ ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
302+ ; CHECK: atomicrmw.start:
303+ ; CHECK-NEXT: [[LOADED:%.*]] = phi half [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
304+ ; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.minimum.f16(half [[LOADED]], half [[VAL:%.*]])
305+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast half [[TMP2]] to i16
306+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[LOADED]] to i16
307+ ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i16 [[TMP4]], i16 [[TMP3]] seq_cst seq_cst, align 2
308+ ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i16, i1 } [[TMP5]], 1
309+ ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i16, i1 } [[TMP5]], 0
310+ ; CHECK-NEXT: [[TMP6]] = bitcast i16 [[NEWLOADED]] to half
311+ ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
312+ ; CHECK: atomicrmw.end:
313+ ; CHECK-NEXT: ret half [[TMP6]]
314+ ;
315+ %res = atomicrmw fminimum ptr %ptr , half %val seq_cst
316+ ret half %res
317+ }
318+
319+ define <2 x half > @atomicrmw_fminimum_2_x_half (ptr %ptr , <2 x half > %val ) {
320+ ; CHECK-LABEL: @atomicrmw_fminimum_2_x_half(
321+ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[PTR:%.*]], align 4
322+ ; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
323+ ; CHECK: atomicrmw.start:
324+ ; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
325+ ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.minimum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VAL:%.*]])
326+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32
327+ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32
328+ ; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4
329+ ; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
330+ ; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
331+ ; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half>
332+ ; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
333+ ; CHECK: atomicrmw.end:
334+ ; CHECK-NEXT: ret <2 x half> [[TMP6]]
335+ ;
336+ %res = atomicrmw fminimum ptr %ptr , <2 x half > %val seq_cst
337+ ret <2 x half > %res
338+ }
0 commit comments