@@ -318,22 +318,14 @@ entry:
318318define float @f (ptr nocapture readonly %x ) {
319319; CHECK-LABEL: @f(
320320; CHECK-NEXT: entry:
321- ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
322- ; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
323- ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
324- ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
325- ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
326- ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
321+ ; CHECK-NEXT: [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4
322+ ; CHECK-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]])
327323; CHECK-NEXT: ret float [[OP_RDX]]
328324;
329325; THRESHOLD-LABEL: @f(
330326; THRESHOLD-NEXT: entry:
331- ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <32 x float>, ptr [[X:%.*]], align 4
332- ; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, ptr [[X]], i64 32
333- ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[ARRAYIDX_32]], align 4
334- ; THRESHOLD-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> [[TMP0]])
335- ; THRESHOLD-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP1]])
336- ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP2]], [[TMP3]]
327+ ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <48 x float>, ptr [[X:%.*]], align 4
328+ ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v48f32(float 0.000000e+00, <48 x float> [[TMP0]])
337329; THRESHOLD-NEXT: ret float [[OP_RDX]]
338330;
339331 entry:
@@ -606,18 +598,14 @@ define float @loadadd31(ptr nocapture readonly %x) {
606598; CHECK-LABEL: @loadadd31(
607599; CHECK-NEXT: entry:
608600; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
609- ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
610- ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
611- ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
601+ ; CHECK-NEXT: [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4
612602; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
613603; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
614604; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
615605; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
616606; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
617607; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
618- ; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
619- ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
620- ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
608+ ; CHECK-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]])
621609; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
622610; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
623611; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
@@ -627,18 +615,14 @@ define float @loadadd31(ptr nocapture readonly %x) {
627615; THRESHOLD-LABEL: @loadadd31(
628616; THRESHOLD-NEXT: entry:
629617; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i64 1
630- ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[ARRAYIDX]], align 4
631- ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, ptr [[X]], i64 17
632- ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX_16]], align 4
618+ ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <24 x float>, ptr [[ARRAYIDX]], align 4
633619; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, ptr [[X]], i64 25
634620; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX_24]], align 4
635621; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, ptr [[X]], i64 29
636622; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4
637623; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30
638624; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4
639- ; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP0]])
640- ; THRESHOLD-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
641- ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP5]], [[TMP6]]
625+ ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP0]])
642626; THRESHOLD-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
643627; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = fadd fast float [[OP_RDX]], [[TMP7]]
644628; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]]
0 commit comments