Skip to content

Commit be31168

Browse files
authored
[s390x] Add full support for f128 (#10774)
This adds all missing operations to fully support f128 at the same level as f64 and f32 on s390x. The implementation is mostly a straightfoward extension of existing support, using the same vector instruction set. The exception is conversion between integer and f128, where we have to use the older floating-point instructions that use FPR register pairs to hold a f128 value. As regalloc does not support pairs, those registers are hard-coded, just as is done for the few instructions that require GPR register pairs.
1 parent fc387e0 commit be31168

File tree

8 files changed

+2022
-91
lines changed

8 files changed

+2022
-91
lines changed

cranelift/codegen/src/isa/s390x/inst.isle

Lines changed: 153 additions & 31 deletions
Large diffs are not rendered by default.

cranelift/codegen/src/isa/s390x/inst/emit.rs

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,37 @@ macro_rules! debug_assert_valid_regpair {
4040
};
4141
}
4242

43+
macro_rules! debug_assert_valid_fp_regpair {
44+
($hi:expr, $lo:expr) => {
45+
if cfg!(debug_assertions) {
46+
match ($hi.to_real_reg(), $lo.to_real_reg()) {
47+
(Some(hi), Some(lo)) => {
48+
assert!(
49+
hi.hw_enc() & 2 == 0,
50+
"High register is not valid: {}",
51+
show_reg($hi)
52+
);
53+
assert_eq!(
54+
hi.hw_enc() + 2,
55+
lo.hw_enc(),
56+
"Low register is not valid: {}, {}",
57+
show_reg($hi),
58+
show_reg($lo)
59+
);
60+
}
61+
62+
_ => {
63+
panic!(
64+
"Expected real registers for {} {}",
65+
show_reg($hi),
66+
show_reg($lo)
67+
);
68+
}
69+
}
70+
}
71+
};
72+
}
73+
4374
const OPCODE_BRAS: u16 = 0xa75;
4475
const OPCODE_BCR: u16 = 0xa74;
4576
const OPCODE_LDR: u16 = 0x28;
@@ -2396,24 +2427,29 @@ impl Inst {
23962427
let (opcode, m3, m4, m5, opcode_fpr) = match fpu_op {
23972428
FPUOp1::Abs32 => (0xe7cc, 2, 8, 2, Some(0xb300)), // WFPSO, LPEBR
23982429
FPUOp1::Abs64 => (0xe7cc, 3, 8, 2, Some(0xb310)), // WFPSO, LPDBR
2430+
FPUOp1::Abs128 => (0xe7cc, 4, 8, 2, None), // WFPSO
23992431
FPUOp1::Abs32x4 => (0xe7cc, 2, 0, 2, None), // VFPSO
24002432
FPUOp1::Abs64x2 => (0xe7cc, 3, 0, 2, None), // VFPSO
24012433
FPUOp1::Neg32 => (0xe7cc, 2, 8, 0, Some(0xb303)), // WFPSO, LCEBR
24022434
FPUOp1::Neg64 => (0xe7cc, 3, 8, 0, Some(0xb313)), // WFPSO, LCDBR
2435+
FPUOp1::Neg128 => (0xe7cc, 4, 8, 0, None), // WFPSO
24032436
FPUOp1::Neg32x4 => (0xe7cc, 2, 0, 0, None), // VFPSO
24042437
FPUOp1::Neg64x2 => (0xe7cc, 3, 0, 0, None), // VFPSO
24052438
FPUOp1::NegAbs32 => (0xe7cc, 2, 8, 1, Some(0xb301)), // WFPSO, LNEBR
24062439
FPUOp1::NegAbs64 => (0xe7cc, 3, 8, 1, Some(0xb311)), // WFPSO, LNDBR
2440+
FPUOp1::NegAbs128 => (0xe7cc, 4, 8, 1, None), // WFPSO
24072441
FPUOp1::NegAbs32x4 => (0xe7cc, 2, 0, 1, None), // VFPSO
24082442
FPUOp1::NegAbs64x2 => (0xe7cc, 3, 0, 1, None), // VFPSO
24092443
FPUOp1::Sqrt32 => (0xe7ce, 2, 8, 0, Some(0xb314)), // WFSQ, SQEBR
24102444
FPUOp1::Sqrt64 => (0xe7ce, 3, 8, 0, Some(0xb315)), // WFSQ, SQDBR
2445+
FPUOp1::Sqrt128 => (0xe7ce, 4, 8, 0, None), // WFSQ
24112446
FPUOp1::Sqrt32x4 => (0xe7ce, 2, 0, 0, None), // VFSQ
24122447
FPUOp1::Sqrt64x2 => (0xe7ce, 3, 0, 0, None), // VFSQ
24132448
FPUOp1::Cvt32To64 => (0xe7c4, 2, 8, 0, Some(0xb304)), // WFLL, LDEBR
24142449
FPUOp1::Cvt32x4To64x2 => (0xe7c4, 2, 0, 0, None), // VFLL
2450+
FPUOp1::Cvt64To128 => (0xe7c4, 3, 8, 0, None), // WFLL
24152451
};
2416-
if m4 == 8 && is_fpr(rd.to_reg()) && is_fpr(rn) {
2452+
if m4 == 8 && opcode_fpr.is_some() && is_fpr(rd.to_reg()) && is_fpr(rn) {
24172453
put(sink, &enc_rre(opcode_fpr.unwrap(), rd.to_reg(), rn));
24182454
} else {
24192455
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, m4, m5));
@@ -2423,34 +2459,42 @@ impl Inst {
24232459
let (opcode, m4, m5, m6, opcode_fpr) = match fpu_op {
24242460
FPUOp2::Add32 => (0xe7e3, 2, 8, 0, Some(0xb30a)), // WFA, AEBR
24252461
FPUOp2::Add64 => (0xe7e3, 3, 8, 0, Some(0xb31a)), // WFA, ADBR
2462+
FPUOp2::Add128 => (0xe7e3, 4, 8, 0, None), // WFA
24262463
FPUOp2::Add32x4 => (0xe7e3, 2, 0, 0, None), // VFA
24272464
FPUOp2::Add64x2 => (0xe7e3, 3, 0, 0, None), // VFA
24282465
FPUOp2::Sub32 => (0xe7e2, 2, 8, 0, Some(0xb30b)), // WFS, SEBR
24292466
FPUOp2::Sub64 => (0xe7e2, 3, 8, 0, Some(0xb31b)), // WFS, SDBR
2467+
FPUOp2::Sub128 => (0xe7e2, 4, 8, 0, None), // WFS
24302468
FPUOp2::Sub32x4 => (0xe7e2, 2, 0, 0, None), // VFS
24312469
FPUOp2::Sub64x2 => (0xe7e2, 3, 0, 0, None), // VFS
24322470
FPUOp2::Mul32 => (0xe7e7, 2, 8, 0, Some(0xb317)), // WFM, MEEBR
24332471
FPUOp2::Mul64 => (0xe7e7, 3, 8, 0, Some(0xb31c)), // WFM, MDBR
2472+
FPUOp2::Mul128 => (0xe7e7, 4, 8, 0, None), // WFM
24342473
FPUOp2::Mul32x4 => (0xe7e7, 2, 0, 0, None), // VFM
24352474
FPUOp2::Mul64x2 => (0xe7e7, 3, 0, 0, None), // VFM
24362475
FPUOp2::Div32 => (0xe7e5, 2, 8, 0, Some(0xb30d)), // WFD, DEBR
24372476
FPUOp2::Div64 => (0xe7e5, 3, 8, 0, Some(0xb31d)), // WFD, DDBR
2477+
FPUOp2::Div128 => (0xe7e5, 4, 8, 0, None), // WFD
24382478
FPUOp2::Div32x4 => (0xe7e5, 2, 0, 0, None), // VFD
24392479
FPUOp2::Div64x2 => (0xe7e5, 3, 0, 0, None), // VFD
24402480
FPUOp2::Max32 => (0xe7ef, 2, 8, 1, None), // WFMAX
24412481
FPUOp2::Max64 => (0xe7ef, 3, 8, 1, None), // WFMAX
2482+
FPUOp2::Max128 => (0xe7ef, 4, 8, 1, None), // WFMAX
24422483
FPUOp2::Max32x4 => (0xe7ef, 2, 0, 1, None), // VFMAX
24432484
FPUOp2::Max64x2 => (0xe7ef, 3, 0, 1, None), // VFMAX
24442485
FPUOp2::Min32 => (0xe7ee, 2, 8, 1, None), // WFMIN
24452486
FPUOp2::Min64 => (0xe7ee, 3, 8, 1, None), // WFMIN
2487+
FPUOp2::Min128 => (0xe7ee, 4, 8, 1, None), // WFMIN
24462488
FPUOp2::Min32x4 => (0xe7ee, 2, 0, 1, None), // VFMIN
24472489
FPUOp2::Min64x2 => (0xe7ee, 3, 0, 1, None), // VFMIN
24482490
FPUOp2::MaxPseudo32 => (0xe7ef, 2, 8, 3, None), // WFMAX
24492491
FPUOp2::MaxPseudo64 => (0xe7ef, 3, 8, 3, None), // WFMAX
2492+
FPUOp2::MaxPseudo128 => (0xe7ef, 4, 8, 3, None), // WFMAX
24502493
FPUOp2::MaxPseudo32x4 => (0xe7ef, 2, 0, 3, None), // VFMAX
24512494
FPUOp2::MaxPseudo64x2 => (0xe7ef, 3, 0, 3, None), // VFMAX
24522495
FPUOp2::MinPseudo32 => (0xe7ee, 2, 8, 3, None), // WFMIN
24532496
FPUOp2::MinPseudo64 => (0xe7ee, 3, 8, 3, None), // WFMIN
2497+
FPUOp2::MinPseudo128 => (0xe7ee, 4, 8, 3, None), // WFMIN
24542498
FPUOp2::MinPseudo32x4 => (0xe7ee, 2, 0, 3, None), // VFMIN
24552499
FPUOp2::MinPseudo64x2 => (0xe7ee, 3, 0, 3, None), // VFMIN
24562500
};
@@ -2471,14 +2515,22 @@ impl Inst {
24712515
let (opcode, m5, m6, opcode_fpr) = match fpu_op {
24722516
FPUOp3::MAdd32 => (0xe78f, 8, 2, Some(0xb30e)), // WFMA, MAEBR
24732517
FPUOp3::MAdd64 => (0xe78f, 8, 3, Some(0xb31e)), // WFMA, MADBR
2518+
FPUOp3::MAdd128 => (0xe78f, 8, 4, None), // WFMA
24742519
FPUOp3::MAdd32x4 => (0xe78f, 0, 2, None), // VFMA
24752520
FPUOp3::MAdd64x2 => (0xe78f, 0, 3, None), // VFMA
24762521
FPUOp3::MSub32 => (0xe78e, 8, 2, Some(0xb30f)), // WFMS, MSEBR
24772522
FPUOp3::MSub64 => (0xe78e, 8, 3, Some(0xb31f)), // WFMS, MSDBR
2523+
FPUOp3::MSub128 => (0xe78e, 8, 4, None), // WFMS
24782524
FPUOp3::MSub32x4 => (0xe78e, 0, 2, None), // VFMS
24792525
FPUOp3::MSub64x2 => (0xe78e, 0, 3, None), // VFMS
24802526
};
2481-
if m5 == 8 && rd.to_reg() == ra && is_fpr(rn) && is_fpr(rm) && is_fpr(ra) {
2527+
if m5 == 8
2528+
&& opcode_fpr.is_some()
2529+
&& rd.to_reg() == ra
2530+
&& is_fpr(rn)
2531+
&& is_fpr(rm)
2532+
&& is_fpr(ra)
2533+
{
24822534
put(sink, &enc_rrd(opcode_fpr.unwrap(), rd.to_reg(), rm, rn));
24832535
} else {
24842536
put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, m5, m6));
@@ -2497,8 +2549,10 @@ impl Inst {
24972549
let (opcode, m3, m4, opcode_fpr) = match op {
24982550
FpuRoundOp::Cvt64To32 => (0xe7c5, 3, 8, Some(0xb344)), // WFLR, LEDBR(A)
24992551
FpuRoundOp::Cvt64x2To32x4 => (0xe7c5, 3, 0, None), // VFLR
2552+
FpuRoundOp::Cvt128To64 => (0xe7c5, 4, 8, None), // WFLR
25002553
FpuRoundOp::Round32 => (0xe7c7, 2, 8, Some(0xb357)), // WFI, FIEBR
25012554
FpuRoundOp::Round64 => (0xe7c7, 3, 8, Some(0xb35f)), // WFI, FIDBR
2555+
FpuRoundOp::Round128 => (0xe7c7, 4, 8, None), // WFI
25022556
FpuRoundOp::Round32x4 => (0xe7c7, 2, 0, None), // VFI
25032557
FpuRoundOp::Round64x2 => (0xe7c7, 3, 0, None), // VFI
25042558
FpuRoundOp::ToSInt32 => (0xe7c2, 2, 8, None), // WCSFP
@@ -2527,6 +2581,50 @@ impl Inst {
25272581
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, m4, mode));
25282582
}
25292583
}
2584+
&Inst::FpuConv128FromInt { op, mode, rd, rn } => {
2585+
let rd1 = rd.hi;
2586+
let rd2 = rd.lo;
2587+
debug_assert_valid_fp_regpair!(rd1.to_reg(), rd2.to_reg());
2588+
2589+
let mode = match mode {
2590+
FpuRoundMode::Current => 0,
2591+
FpuRoundMode::ToNearest => 1,
2592+
FpuRoundMode::ShorterPrecision => 3,
2593+
FpuRoundMode::ToNearestTiesToEven => 4,
2594+
FpuRoundMode::ToZero => 5,
2595+
FpuRoundMode::ToPosInfinity => 6,
2596+
FpuRoundMode::ToNegInfinity => 7,
2597+
};
2598+
let opcode = match op {
2599+
FpuConv128Op::SInt32 => 0xb396, // CXFBRA
2600+
FpuConv128Op::SInt64 => 0xb3a6, // CXGBRA
2601+
FpuConv128Op::UInt32 => 0xb392, // CXLFBR
2602+
FpuConv128Op::UInt64 => 0xb3a2, // CXLGBR
2603+
};
2604+
put(sink, &enc_rrf_cde(opcode, rd1.to_reg(), rn, mode, 0));
2605+
}
2606+
&Inst::FpuConv128ToInt { op, mode, rd, rn } => {
2607+
let rn1 = rn.hi;
2608+
let rn2 = rn.lo;
2609+
debug_assert_valid_fp_regpair!(rn1, rn2);
2610+
2611+
let mode = match mode {
2612+
FpuRoundMode::Current => 0,
2613+
FpuRoundMode::ToNearest => 1,
2614+
FpuRoundMode::ShorterPrecision => 3,
2615+
FpuRoundMode::ToNearestTiesToEven => 4,
2616+
FpuRoundMode::ToZero => 5,
2617+
FpuRoundMode::ToPosInfinity => 6,
2618+
FpuRoundMode::ToNegInfinity => 7,
2619+
};
2620+
let opcode = match op {
2621+
FpuConv128Op::SInt32 => 0xb39a, // CFXBRA
2622+
FpuConv128Op::SInt64 => 0xb3aa, // CGXBRA
2623+
FpuConv128Op::UInt32 => 0xb39e, // CLFXBR
2624+
FpuConv128Op::UInt64 => 0xb3ae, // CLGXBR
2625+
};
2626+
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn1, mode, 0));
2627+
}
25302628
&Inst::FpuCmp32 { rn, rm } => {
25312629
if is_fpr(rn) && is_fpr(rm) {
25322630
let opcode = 0xb309; // CEBR
@@ -2545,6 +2643,10 @@ impl Inst {
25452643
put(sink, &enc_vrr_a(opcode, rn, rm, 3, 0, 0));
25462644
}
25472645
}
2646+
&Inst::FpuCmp128 { rn, rm } => {
2647+
let opcode = 0xe7cb; // WFC
2648+
put(sink, &enc_vrr_a(opcode, rn, rm, 4, 0, 0));
2649+
}
25482650

25492651
&Inst::VecRRR { op, rd, rn, rm } => {
25502652
let (opcode, m4) = match op {

0 commit comments

Comments
 (0)