Skip to content

Commit dbdb234

Browse files
committed
[LoongArch] Add patterns to support vector type average instructions generation
1 parent 1b4a585 commit dbdb234

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,6 +2015,56 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
20152015
(XVFTINTRZ_LU_D v4f64:$vj)),
20162016
sub_128)>;
20172017

2018+
// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
2019+
def : Pat<(sra (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
2020+
(XVAVG_B v32i8:$xj, v32i8:$xk)>;
2021+
def : Pat<(sra (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
2022+
(XVAVG_H v16i16:$xj, v16i16:$xk)>;
2023+
def : Pat<(sra (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
2024+
(XVAVG_W v8i32:$xj, v8i32:$xk)>;
2025+
def : Pat<(sra (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
2026+
(XVAVG_D v4i64:$xj, v4i64:$xk)>;
2027+
def : Pat<(srl (v32i8 (add v32i8:$xj, v32i8:$xk)), (v32i8 (vsplat_imm_eq_1))),
2028+
(XVAVG_BU v32i8:$xj, v32i8:$xk)>;
2029+
def : Pat<(srl (v16i16 (add v16i16:$xj, v16i16:$xk)), (v16i16 (vsplat_imm_eq_1))),
2030+
(XVAVG_HU v16i16:$xj, v16i16:$xk)>;
2031+
def : Pat<(srl (v8i32 (add v8i32:$xj, v8i32:$xk)), (v8i32 (vsplat_imm_eq_1))),
2032+
(XVAVG_WU v8i32:$xj, v8i32:$xk)>;
2033+
def : Pat<(srl (v4i64 (add v4i64:$xj, v4i64:$xk)), (v4i64 (vsplat_imm_eq_1))),
2034+
(XVAVG_DU v4i64:$xj, v4i64:$xk)>;
2035+
def : Pat<(sra (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
2036+
(v32i8 (vsplat_imm_eq_1)))),
2037+
(v32i8 (vsplat_imm_eq_1))),
2038+
(XVAVGR_B v32i8:$vj, v32i8:$vk)>;
2039+
def : Pat<(sra (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
2040+
(v16i16 (vsplat_imm_eq_1)))),
2041+
(v16i16 (vsplat_imm_eq_1))),
2042+
(XVAVGR_H v16i16:$vj, v16i16:$vk)>;
2043+
def : Pat<(sra (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
2044+
(v8i32 (vsplat_imm_eq_1)))),
2045+
(v8i32 (vsplat_imm_eq_1))),
2046+
(XVAVGR_W v8i32:$vj, v8i32:$vk)>;
2047+
def : Pat<(sra (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
2048+
(v4i64 (vsplat_imm_eq_1)))),
2049+
(v4i64 (vsplat_imm_eq_1))),
2050+
(XVAVGR_D v4i64:$vj, v4i64:$vk)>;
2051+
def : Pat<(srl (v32i8 (add (v32i8 (add v32i8:$vj, v32i8:$vk)),
2052+
(v32i8 (vsplat_imm_eq_1)))),
2053+
(v32i8 (vsplat_imm_eq_1))),
2054+
(XVAVGR_BU v32i8:$vj, v32i8:$vk)>;
2055+
def : Pat<(srl (v16i16 (add (v16i16 (add v16i16:$vj, v16i16:$vk)),
2056+
(v16i16 (vsplat_imm_eq_1)))),
2057+
(v16i16 (vsplat_imm_eq_1))),
2058+
(XVAVGR_HU v16i16:$vj, v16i16:$vk)>;
2059+
def : Pat<(srl (v8i32 (add (v8i32 (add v8i32:$vj, v8i32:$vk)),
2060+
(v8i32 (vsplat_imm_eq_1)))),
2061+
(v8i32 (vsplat_imm_eq_1))),
2062+
(XVAVGR_WU v8i32:$vj, v8i32:$vk)>;
2063+
def : Pat<(srl (v4i64 (add (v4i64 (add v4i64:$vj, v4i64:$vk)),
2064+
(v4i64 (vsplat_imm_eq_1)))),
2065+
(v4i64 (vsplat_imm_eq_1))),
2066+
(XVAVGR_DU v4i64:$vj, v4i64:$vk)>;
2067+
20182068
// XVABSD_{B/H/W/D}[U]
20192069
defm : PatXrXr<abds, "XVABSD">;
20202070
defm : PatXrXrU<abdu, "XVABSD">;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,6 +2153,56 @@ def : Pat<(f32 f32imm_vldi:$in),
21532153
def : Pat<(f64 f64imm_vldi:$in),
21542154
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
21552155

2156+
// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
2157+
def : Pat<(sra (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
2158+
(VAVG_B v16i8:$vj, v16i8:$vk)>;
2159+
def : Pat<(sra (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
2160+
(VAVG_H v8i16:$vj, v8i16:$vk)>;
2161+
def : Pat<(sra (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
2162+
(VAVG_W v4i32:$vj, v4i32:$vk)>;
2163+
def : Pat<(sra (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
2164+
(VAVG_D v2i64:$vj, v2i64:$vk)>;
2165+
def : Pat<(srl (v16i8 (add v16i8:$vj, v16i8:$vk)), (v16i8 (vsplat_imm_eq_1))),
2166+
(VAVG_BU v16i8:$vj, v16i8:$vk)>;
2167+
def : Pat<(srl (v8i16 (add v8i16:$vj, v8i16:$vk)), (v8i16 (vsplat_imm_eq_1))),
2168+
(VAVG_HU v8i16:$vj, v8i16:$vk)>;
2169+
def : Pat<(srl (v4i32 (add v4i32:$vj, v4i32:$vk)), (v4i32 (vsplat_imm_eq_1))),
2170+
(VAVG_WU v4i32:$vj, v4i32:$vk)>;
2171+
def : Pat<(srl (v2i64 (add v2i64:$vj, v2i64:$vk)), (v2i64 (vsplat_imm_eq_1))),
2172+
(VAVG_DU v2i64:$vj, v2i64:$vk)>;
2173+
def : Pat<(sra (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
2174+
(v16i8 (vsplat_imm_eq_1)))),
2175+
(v16i8 (vsplat_imm_eq_1))),
2176+
(VAVGR_B v16i8:$vj, v16i8:$vk)>;
2177+
def : Pat<(sra (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
2178+
(v8i16 (vsplat_imm_eq_1)))),
2179+
(v8i16 (vsplat_imm_eq_1))),
2180+
(VAVGR_H v8i16:$vj, v8i16:$vk)>;
2181+
def : Pat<(sra (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
2182+
(v4i32 (vsplat_imm_eq_1)))),
2183+
(v4i32 (vsplat_imm_eq_1))),
2184+
(VAVGR_W v4i32:$vj, v4i32:$vk)>;
2185+
def : Pat<(sra (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
2186+
(v2i64 (vsplat_imm_eq_1)))),
2187+
(v2i64 (vsplat_imm_eq_1))),
2188+
(VAVGR_D v2i64:$vj, v2i64:$vk)>;
2189+
def : Pat<(srl (v16i8 (add (v16i8 (add v16i8:$vj, v16i8:$vk)),
2190+
(v16i8 (vsplat_imm_eq_1)))),
2191+
(v16i8 (vsplat_imm_eq_1))),
2192+
(VAVGR_BU v16i8:$vj, v16i8:$vk)>;
2193+
def : Pat<(srl (v8i16 (add (v8i16 (add v8i16:$vj, v8i16:$vk)),
2194+
(v8i16 (vsplat_imm_eq_1)))),
2195+
(v8i16 (vsplat_imm_eq_1))),
2196+
(VAVGR_HU v8i16:$vj, v8i16:$vk)>;
2197+
def : Pat<(srl (v4i32 (add (v4i32 (add v4i32:$vj, v4i32:$vk)),
2198+
(v4i32 (vsplat_imm_eq_1)))),
2199+
(v4i32 (vsplat_imm_eq_1))),
2200+
(VAVGR_WU v4i32:$vj, v4i32:$vk)>;
2201+
def : Pat<(srl (v2i64 (add (v2i64 (add v2i64:$vj, v2i64:$vk)),
2202+
(v2i64 (vsplat_imm_eq_1)))),
2203+
(v2i64 (vsplat_imm_eq_1))),
2204+
(VAVGR_DU v2i64:$vj, v2i64:$vk)>;
2205+
21562206
// VABSD_{B/H/W/D}[U]
21572207
defm : PatVrVr<abds, "VABSD">;
21582208
defm : PatVrVrU<abdu, "VABSD">;

0 commit comments

Comments
 (0)