Skip to content

Commit 696169a

Browse files
[SVE] Add isel patterns that match "FpImm - A" to the immediate form of FSUBR.
Differential Revision: https://reviews.llvm.org/D128200
1 parent 84f486c commit 696169a

File tree

2 files changed

+153
-2
lines changed

2 files changed

+153
-2
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,9 @@ def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
297297
[(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3),
298298
(AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>;
299299

300+
def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
301+
(AArch64fsub_p node:$pg, node:$op2, node:$op1)>;
302+
300303
def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
301304
(AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
302305
return N->getFlags().hasNoSignedZeros();
@@ -460,11 +463,11 @@ let Predicates = [HasSVEorSME] in {
460463
defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
461464
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
462465
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
463-
466+
464467
defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fadd_p>;
465468
defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsub_p>;
466469
defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, AArch64fmul_p>;
467-
defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one>;
470+
defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsubr_p>;
468471
defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmaxnm_p>;
469472
defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fminnm_p>;
470473
defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmax_p>;

llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,154 @@ define <vscale x 2 x double> @fsub_d_immone(<vscale x 2 x double> %a) #0 {
10371037
ret <vscale x 2 x double> %out
10381038
}
10391039

1040+
;
1041+
; FSUBR
1042+
;
1043+
1044+
define <vscale x 8 x half> @fsubr_h_immhalf(<vscale x 8 x half> %a) #0 {
1045+
; CHECK-LABEL: fsubr_h_immhalf:
1046+
; CHECK: // %bb.0:
1047+
; CHECK-NEXT: ptrue p0.h
1048+
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #0.5
1049+
; CHECK-NEXT: ret
1050+
%elt = insertelement <vscale x 8 x half> undef, half 0.500000e+00, i32 0
1051+
%splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
1052+
%out = fsub <vscale x 8 x half> %splat, %a
1053+
ret <vscale x 8 x half> %out
1054+
}
1055+
1056+
define <vscale x 8 x half> @fsubr_h_immone(<vscale x 8 x half> %a) #0 {
1057+
; CHECK-LABEL: fsubr_h_immone:
1058+
; CHECK: // %bb.0:
1059+
; CHECK-NEXT: ptrue p0.h
1060+
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #1.0
1061+
; CHECK-NEXT: ret
1062+
%elt = insertelement <vscale x 8 x half> undef, half 1.000000e+00, i32 0
1063+
%splat = shufflevector <vscale x 8 x half> %elt, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
1064+
%out = fsub <vscale x 8 x half> %splat, %a
1065+
ret <vscale x 8 x half> %out
1066+
}
1067+
1068+
define <vscale x 4 x half> @fsubr_4h_immhalf(<vscale x 4 x half> %a) #0 {
1069+
; CHECK-LABEL: fsubr_4h_immhalf:
1070+
; CHECK: // %bb.0:
1071+
; CHECK-NEXT: ptrue p0.s
1072+
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #0.5
1073+
; CHECK-NEXT: ret
1074+
%elt = insertelement <vscale x 4 x half> undef, half 0.500000e+00, i32 0
1075+
%splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
1076+
%out = fsub <vscale x 4 x half> %splat, %a
1077+
ret <vscale x 4 x half> %out
1078+
}
1079+
1080+
define <vscale x 4 x half> @fsubr_4h_immone(<vscale x 4 x half> %a) #0 {
1081+
; CHECK-LABEL: fsubr_4h_immone:
1082+
; CHECK: // %bb.0:
1083+
; CHECK-NEXT: ptrue p0.s
1084+
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #1.0
1085+
; CHECK-NEXT: ret
1086+
%elt = insertelement <vscale x 4 x half> undef, half 1.000000e+00, i32 0
1087+
%splat = shufflevector <vscale x 4 x half> %elt, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
1088+
%out = fsub <vscale x 4 x half> %splat, %a
1089+
ret <vscale x 4 x half> %out
1090+
}
1091+
1092+
define <vscale x 2 x half> @fsubr_2h_immhalf(<vscale x 2 x half> %a) #0 {
1093+
; CHECK-LABEL: fsubr_2h_immhalf:
1094+
; CHECK: // %bb.0:
1095+
; CHECK-NEXT: ptrue p0.d
1096+
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #0.5
1097+
; CHECK-NEXT: ret
1098+
%elt = insertelement <vscale x 2 x half> undef, half 0.500000e+00, i32 0
1099+
%splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
1100+
%out = fsub <vscale x 2 x half> %splat, %a
1101+
ret <vscale x 2 x half> %out
1102+
}
1103+
1104+
define <vscale x 2 x half> @fsubr_2h_immone(<vscale x 2 x half> %a) #0 {
1105+
; CHECK-LABEL: fsubr_2h_immone:
1106+
; CHECK: // %bb.0:
1107+
; CHECK-NEXT: ptrue p0.d
1108+
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #1.0
1109+
; CHECK-NEXT: ret
1110+
%elt = insertelement <vscale x 2 x half> undef, half 1.000000e+00, i32 0
1111+
%splat = shufflevector <vscale x 2 x half> %elt, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
1112+
%out = fsub <vscale x 2 x half> %splat, %a
1113+
ret <vscale x 2 x half> %out
1114+
}
1115+
1116+
define <vscale x 4 x float> @fsubr_s_immhalf(<vscale x 4 x float> %a) #0 {
1117+
; CHECK-LABEL: fsubr_s_immhalf:
1118+
; CHECK: // %bb.0:
1119+
; CHECK-NEXT: ptrue p0.s
1120+
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #0.5
1121+
; CHECK-NEXT: ret
1122+
%elt = insertelement <vscale x 4 x float> undef, float 0.500000e+00, i32 0
1123+
%splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
1124+
%out = fsub <vscale x 4 x float> %splat, %a
1125+
ret <vscale x 4 x float> %out
1126+
}
1127+
1128+
define <vscale x 4 x float> @fsubr_s_immone(<vscale x 4 x float> %a) #0 {
1129+
; CHECK-LABEL: fsubr_s_immone:
1130+
; CHECK: // %bb.0:
1131+
; CHECK-NEXT: ptrue p0.s
1132+
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #1.0
1133+
; CHECK-NEXT: ret
1134+
%elt = insertelement <vscale x 4 x float> undef, float 1.000000e+00, i32 0
1135+
%splat = shufflevector <vscale x 4 x float> %elt, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
1136+
%out = fsub <vscale x 4 x float> %splat, %a
1137+
ret <vscale x 4 x float> %out
1138+
}
1139+
1140+
define <vscale x 2 x float> @fsubr_2s_immhalf(<vscale x 2 x float> %a) #0 {
1141+
; CHECK-LABEL: fsubr_2s_immhalf:
1142+
; CHECK: // %bb.0:
1143+
; CHECK-NEXT: ptrue p0.d
1144+
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #0.5
1145+
; CHECK-NEXT: ret
1146+
%elt = insertelement <vscale x 2 x float> undef, float 0.500000e+00, i32 0
1147+
%splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
1148+
%out = fsub <vscale x 2 x float> %splat, %a
1149+
ret <vscale x 2 x float> %out
1150+
}
1151+
1152+
define <vscale x 2 x float> @fsubr_2s_immone(<vscale x 2 x float> %a) #0 {
1153+
; CHECK-LABEL: fsubr_2s_immone:
1154+
; CHECK: // %bb.0:
1155+
; CHECK-NEXT: ptrue p0.d
1156+
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #1.0
1157+
; CHECK-NEXT: ret
1158+
%elt = insertelement <vscale x 2 x float> undef, float 1.000000e+00, i32 0
1159+
%splat = shufflevector <vscale x 2 x float> %elt, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
1160+
%out = fsub <vscale x 2 x float> %splat, %a
1161+
ret <vscale x 2 x float> %out
1162+
}
1163+
1164+
define <vscale x 2 x double> @fsubr_d_immhalf(<vscale x 2 x double> %a) #0 {
1165+
; CHECK-LABEL: fsubr_d_immhalf:
1166+
; CHECK: // %bb.0:
1167+
; CHECK-NEXT: ptrue p0.d
1168+
; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #0.5
1169+
; CHECK-NEXT: ret
1170+
%elt = insertelement <vscale x 2 x double> undef, double 0.500000e+00, i32 0
1171+
%splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
1172+
%out = fsub <vscale x 2 x double> %splat, %a
1173+
ret <vscale x 2 x double> %out
1174+
}
1175+
1176+
define <vscale x 2 x double> @fsubr_d_immone(<vscale x 2 x double> %a) #0 {
1177+
; CHECK-LABEL: fsubr_d_immone:
1178+
; CHECK: // %bb.0:
1179+
; CHECK-NEXT: ptrue p0.d
1180+
; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #1.0
1181+
; CHECK-NEXT: ret
1182+
%elt = insertelement <vscale x 2 x double> undef, double 1.000000e+00, i32 0
1183+
%splat = shufflevector <vscale x 2 x double> %elt, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
1184+
%out = fsub <vscale x 2 x double> %splat, %a
1185+
ret <vscale x 2 x double> %out
1186+
}
1187+
10401188
;; Arithmetic intrinsic declarations
10411189

10421190
declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)

0 commit comments

Comments
 (0)