diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3b268dcbca600..1dc33e9f65142 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3592,6 +3592,18 @@ let Predicates = [HasSVE_or_SME] in { def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))), (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>; + + // Extracts of ``unsigned'' i8 or i16 elements lead to the zero-extend being + // transformed to an AND mask. The mask is redundant since UMOV already zeroes + // the high bits of the destination register. + def : Pat<(i32 (and (vector_extract nxv16i8:$vec, VectorIndexB:$index), 0xff)), + (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>; + def : Pat<(i32 (and (vector_extract nxv8i16:$vec, VectorIndexH:$index), 0xffff)), + (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>; + def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)))), (i64 0xff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)), sub_32)>; + def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)))), (i64 0xffff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)), sub_32)>; } // End HasNEON // Extract first element from vector. diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll index c340df1385124..0cc2e04bfb315 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -12,6 +12,26 @@ define i8 @test_lane0_16xi8( %a) #0 { ret i8 %b } +define i32 @test_lane0_16xi8_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane0_16xi8_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i8 %b to i32 + ret i32 %c +} + +define i64 @test_lane0_16xi8_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane0_16xi8_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i8 %b to i64 + ret i64 %c +} + define i8 @test_lane15_16xi8( %a) #0 { ; CHECK-LABEL: test_lane15_16xi8: ; CHECK: // %bb.0: @@ -21,6 +41,26 @@ define i8 @test_lane15_16xi8( %a) #0 { ret i8 %b } +define i32 @test_lane15_16xi8_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane15_16xi8_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[15] +; CHECK-NEXT: ret + %b = extractelement %a, i32 15 + %c = zext i8 %b to i32 + ret i32 %c +} + +define i64 @test_lane15_16xi8_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane15_16xi8_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[15] +; CHECK-NEXT: ret + %b = extractelement %a, i32 15 + %c = zext i8 %b to i64 + ret i64 %c +} + define i8 @test_lane16_16xi8( %a) #0 { ; CHECK-LABEL: test_lane16_16xi8: ; CHECK: // %bb.0: @@ -31,6 +71,32 @@ define i8 @test_lane16_16xi8( %a) #0 { ret i8 %b } +; FIXME: FMOV+AND -> UMOV. +define i32 @test_lane16_16xi8_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane16_16xi8_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: ret + %b = extractelement %a, i32 16 + %c = zext i8 %b to i32 + ret i32 %c +} + +; FIXME: FMOV+AND -> UMOV. +define i64 @test_lane16_16xi8_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane16_16xi8_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, z0.b[16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: ret + %b = extractelement %a, i32 16 + %c = zext i8 %b to i64 + ret i64 %c +} + define i16 @test_lane0_8xi16( %a) #0 { ; CHECK-LABEL: test_lane0_8xi16: ; CHECK: // %bb.0: @@ -40,6 +106,26 @@ define i16 @test_lane0_8xi16( %a) #0 { ret i16 %b } +define i32 @test_lane0_8xi16_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane0_8xi16_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i16 %b to i32 + ret i32 %c +} + +define i64 @test_lane0_8xi16_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane0_8xi16_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: ret + %b = extractelement %a, i32 0 + %c = zext i16 %b to i64 + ret i64 %c +} + define i16 @test_lane7_8xi16( %a) #0 { ; CHECK-LABEL: test_lane7_8xi16: ; CHECK: // %bb.0: @@ -49,6 +135,26 @@ define i16 @test_lane7_8xi16( %a) #0 { ret i16 %b } +define i32 @test_lane7_8xi16_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane7_8xi16_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + %c = zext i16 %b to i32 + ret i32 %c +} + +define i64 @test_lane7_8xi16_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane7_8xi16_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[7] +; CHECK-NEXT: ret + %b = extractelement %a, i32 7 + %c = zext i16 %b to i64 + ret i64 %c +} + define i16 @test_lane8_8xi16( %a) #0 { ; CHECK-LABEL: test_lane8_8xi16: ; CHECK: // %bb.0: @@ -59,6 +165,32 @@ define i16 @test_lane8_8xi16( %a) #0 { ret i16 %b } +; FIXME: FMOV+AND -> UMOV. +define i32 @test_lane8_8xi16_zext_i32( %a) #0 { +; CHECK-LABEL: test_lane8_8xi16_zext_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0xffff +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + %c = zext i16 %b to i32 + ret i32 %c +} + +; FIXME: FMOV+AND -> UMOV. +define i64 @test_lane8_8xi16_zext_i64( %a) #0 { +; CHECK-LABEL: test_lane8_8xi16_zext_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, z0.h[8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ret + %b = extractelement %a, i32 8 + %c = zext i16 %b to i64 + ret i64 %c +} + define i32 @test_lane0_4xi32( %a) #0 { ; CHECK-LABEL: test_lane0_4xi32: ; CHECK: // %bb.0: