Skip to content

Commit cedf636

Browse files
author
Michael Munday
committed
cmd/compile: add floating point min/max intrinsics on s390x
Add the VECTOR FP (MINIMUM|MAXIMUM) instructions to the assembler and use them in the compiler to implement min and max. Note: I've allowed floating point registers to be used with the single element instructions (those with the W instead of V prefix) to allow easier integration into the compiler. Change-Id: I5f80a510bd248cf483cce95f1979bf63fbae7de6 Reviewed-on: https://go-review.googlesource.com/c/go/+/684715 Reviewed-by: Keith Randall <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Mark Freeman <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 82a1921 commit cedf636

File tree

12 files changed

+160
-2
lines changed

12 files changed

+160
-2
lines changed

src/cmd/asm/internal/asm/testdata/s390x.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,18 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
540540
VSTRCZBS V18, V20, V22, V24 // e78240306f8a
541541
VSTRCZHS V18, V20, V22, V24 // e78241306f8a
542542
VSTRCZFS V18, V20, V22, V24 // e78242306f8a
543+
VFMAXSB $1, V2, V3, V4 // e742301020ef
544+
WFMAXSB $2, V5, V6, V7 // e775602820ef
545+
WFMAXSB $2, F5, F6, F7 // e775602820ef
546+
VFMAXDB $3, V8, V9, V10 // e7a8903030ef
547+
WFMAXDB $4, V11, V12, V13 // e7dbc04830ef
548+
WFMAXDB $4, F11, F12, F13 // e7dbc04830ef
549+
VFMINSB $7, V14, V15, V16 // e70ef07028ee
550+
WFMINSB $8, V17, V18, V19 // e73120882eee
551+
WFMINSB $8, F1, F2, F3 // e731208820ee
552+
VFMINDB $9, V20, V21, V22 // e76450903eee
553+
WFMINDB $10, V23, V24, V25 // e79780a83eee
554+
WFMINDB $10, F7, F8, F9 // e79780a830ee
543555

544556
RET
545557
RET foo(SB)

src/cmd/compile/internal/s390x/ssa.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
281281
case ssa.OpS390XCPSDR:
282282
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
283283
p.Reg = v.Args[0].Reg()
284+
case ssa.OpS390XWFMAXDB, ssa.OpS390XWFMAXSB,
285+
ssa.OpS390XWFMINDB, ssa.OpS390XWFMINSB:
286+
p := opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), 1 /* Java Math.Max() */)
287+
p.AddRestSource(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
284288
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
285289
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
286290
ssa.OpS390XMODD, ssa.OpS390XMODW,

src/cmd/compile/internal/ssa/_gen/S390X.rules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@
145145

146146
(Sqrt32 ...) => (FSQRTS ...)
147147

148+
(Max(64|32)F ...) => (WFMAX(D|S)B ...)
149+
(Min(64|32)F ...) => (WFMIN(D|S)B ...)
150+
148151
// Atomic loads and stores.
149152
// The SYNC instruction (fast-BCR-serialization) prevents store-load
150153
// reordering. Other sequences of memory operations (load-load,

src/cmd/compile/internal/ssa/_gen/S390XOps.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ func init() {
222222
{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit
223223
{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0
224224

225+
// Single element vector floating point min / max instructions
226+
{name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
227+
{name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
228+
{name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
229+
{name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
230+
225231
// Round to integer, float64 only.
226232
//
227233
// aux | rounding mode

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 60 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteS390X.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssagen/ssa.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3986,7 +3986,7 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
39863986
if typ.IsFloat() {
39873987
hasIntrinsic := false
39883988
switch Arch.LinkArch.Family {
3989-
case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64:
3989+
case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64, sys.S390X:
39903990
hasIntrinsic = true
39913991
case sys.PPC64:
39923992
hasIntrinsic = buildcfg.GOPPC64 >= 9

src/cmd/internal/obj/s390x/a.out.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,14 @@ const (
715715
AWFLNDB
716716
AVFLPDB
717717
AWFLPDB
718+
AVFMAXDB
719+
AWFMAXDB
720+
AVFMAXSB
721+
AWFMAXSB
722+
AVFMINDB
723+
AWFMINDB
724+
AVFMINSB
725+
AWFMINSB
718726
AVFSQ
719727
AVFSQDB
720728
AWFSQDB

src/cmd/internal/obj/s390x/anames.go

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/s390x/asmz.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,11 @@ var optab = []Optab{
441441
{i: 119, as: AVERLLVG, a1: C_VREG, a2: C_VREG, a6: C_VREG},
442442
{i: 119, as: AVERLLVG, a1: C_VREG, a6: C_VREG},
443443

444+
// VRR-c floating point min/max
445+
{i: 128, as: AVFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
446+
{i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
447+
{i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_FREG, a3: C_FREG, a6: C_FREG},
448+
444449
// VRR-d
445450
{i: 120, as: AVACQ, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG},
446451

@@ -1480,6 +1485,14 @@ func buildop(ctxt *obj.Link) {
14801485
opset(AVFMSDB, r)
14811486
opset(AWFMSDB, r)
14821487
opset(AVPERM, r)
1488+
case AVFMAXDB:
1489+
opset(AVFMAXSB, r)
1490+
opset(AVFMINDB, r)
1491+
opset(AVFMINSB, r)
1492+
case AWFMAXDB:
1493+
opset(AWFMAXSB, r)
1494+
opset(AWFMINDB, r)
1495+
opset(AWFMINSB, r)
14831496
case AKM:
14841497
opset(AKMC, r)
14851498
opset(AKLMD, r)
@@ -2636,6 +2649,8 @@ const (
26362649
op_VUPLL uint32 = 0xE7D4 // VRR-a VECTOR UNPACK LOGICAL LOW
26372650
op_VUPL uint32 = 0xE7D6 // VRR-a VECTOR UNPACK LOW
26382651
op_VMSL uint32 = 0xE7B8 // VRR-d VECTOR MULTIPLY SUM LOGICAL
2652+
op_VFMAX uint32 = 0xE7EF // VRR-c VECTOR FP MAXIMUM
2653+
op_VFMIN uint32 = 0xE7EE // VRR-c VECTOR FP MINIMUM
26392654

26402655
// added in z15
26412656
op_KDSA uint32 = 0xB93A // FORMAT_RRE COMPUTE DIGITAL SIGNATURE AUTHENTICATION (KDSA)
@@ -4475,6 +4490,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
44754490
c.ctxt.Diag("padding byte register cannot be same as input or output register %v", p)
44764491
}
44774492
zRS(op_MVCLE, uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), uint32(d2), asm)
4493+
4494+
case 128: // VRR-c floating point max/min
4495+
op, m4, _ := vop(p.As)
4496+
m5 := singleElementMask(p.As)
4497+
m6 := uint32(c.vregoff(&p.From))
4498+
zVRRc(op, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), m6, m5, m4, asm)
44784499
}
44794500
}
44804501

0 commit comments

Comments
 (0)