66;
77;============================ end_copyright_notice =============================
88
9- ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix =SIMD16 %s
10- ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefix =SIMD32 %s
9+ ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPG -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes =SIMD16,CHECK %s
10+ ; RUN: %opt %use_old_pass_manager% -GenXLowering -march=genx64 -mcpu=XeHPC -mtriple=spir64-unknown-unknown -S < %s | FileCheck --check-prefixes =SIMD32,CHECK %s
1111; REQUIRES: llvm_12_or_greater
1212
1313declare i32 @llvm.vector.reduce.add.v96i32 (<96 x i32 >)
@@ -20,6 +20,8 @@ declare float @llvm.vector.reduce.fmax.v96f32(<96 x float>)
2020declare i32 @llvm.vector.reduce.add.v14i32 (<14 x i32 >)
2121declare i32 @llvm.vector.reduce.add.v73i32 (<73 x i32 >)
2222
23+ declare float @llvm.vector.reduce.fadd.v16f32 (float , <16 x float >)
24+
2325define i32 @test_add (<96 x i32 > %src ) {
2426; SIMD16-LABEL: @test_add(
2527; SIMD16-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.genx.rdregioni.v16i32.v96i32.i16(<96 x i32> [[SRC:%.*]], i32 0, i32 16, i32 1, i16 0, i32 undef)
@@ -456,3 +458,24 @@ define float @test_fmin(<96 x float> %src) {
456458 %reduce = call reassoc float @llvm.vector.reduce.fmin.v96f32 (<96 x float > %src )
457459 ret float %reduce
458460}
461+
462+ define float @test_fadd_legal (<16 x float > %src ) {
463+ ; CHECK-LABEL: @test_fadd_legal(
464+ ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC:%.*]], i32 0, i32 8, i32 1, i16 0, i32 undef)
465+ ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.genx.rdregionf.v8f32.v16f32.i16(<16 x float> [[SRC]], i32 0, i32 8, i32 1, i16 32, i32 undef)
466+ ; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
467+ ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 0, i32 undef)
468+ ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.genx.rdregionf.v4f32.v8f32.i16(<8 x float> [[TMP3]], i32 0, i32 4, i32 1, i16 16, i32 undef)
469+ ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
470+ ; CHECK-NEXT: [[TMP7:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 0, i32 undef)
471+ ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.genx.rdregionf.v2f32.v4f32.i16(<4 x float> [[TMP6]], i32 0, i32 2, i32 1, i16 8, i32 undef)
472+ ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[TMP8]]
473+ ; CHECK-NEXT: [[TMP10:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 0, i32 undef)
474+ ; CHECK-NEXT: [[TMP11:%.*]] = call <1 x float> @llvm.genx.rdregionf.v1f32.v2f32.i16(<2 x float> [[TMP9]], i32 0, i32 1, i32 1, i16 4, i32 undef)
475+ ; CHECK-NEXT: [[TMP12:%.*]] = fadd <1 x float> [[TMP10]], [[TMP11]]
476+ ; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x float> [[TMP12]] to float
477+ ; CHECK-NEXT: [[RES:%.*]] = fadd float [[TMP13]], 0.000000e+00
478+ ; CHECK-NEXT: ret float [[RES]]
479+ %reduce = call reassoc float @llvm.vector.reduce.fadd.v16f32 (float 0 .0 , <16 x float > %src )
480+ ret float %reduce
481+ }
0 commit comments