Skip to content

Commit e6a5ec3

Browse files
authored
x64: Tidy up some handling of sinkable loads (#5840)
This commit refactors a bit about how sinkable loads are handled in the x64 backend. The intention is to bring most handling around sinkable loads up to date with the current state of the backend since things have changed since these were originally introduced, namely automatic conversions between types in ISLE. For example the `Value` type can be automatically converted to `RegMem` to perform load sinking, but some rules are still explicitly doing matching themselves. Here I've removed explicit handling of immediates and sinkable loads when they're the right-hand-side of an operation. These cases are already handle by the "base case" when converting a `Value` to a `RegMemImm`. Instead only rules explicitly for left-hand-side immediates and sinkable loads remain. This helps cut down on the number of explicit rules needed. Additionally in the same manner that `Value` can be automatically converted to `RegMem` I've added automatic conversions from `SinkableLoad` to `RegMem` and the various other newtypes. This helps cut down a bit on rule verbosity where `sink_load_*` is largely no longer necessary.
1 parent 0f51338 commit e6a5ec3

File tree

2 files changed

+32
-113
lines changed

2 files changed

+32
-113
lines changed

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4301,6 +4301,10 @@
43014301
(convert IntCC CC intcc_to_cc)
43024302
(convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op)
43034303

4304+
(convert SinkableLoad RegMem sink_load)
4305+
(convert SinkableLoad GprMemImm sink_load_to_gpr_mem_imm)
4306+
(convert SinkableLoad XmmMem sink_load_to_xmm_mem)
4307+
43044308
(decl reg_to_xmm_mem (Reg) XmmMem)
43054309
(rule (reg_to_xmm_mem r)
43064310
(xmm_to_xmm_mem (xmm_new r)))

cranelift/codegen/src/isa/x64/lower.isle

Lines changed: 28 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -46,29 +46,15 @@
4646
(iadd x y)))
4747
(x64_add ty x y))
4848

49-
;; Add a register and an immediate.
49+
;; The above case handles when the rhs is an immediate or a sinkable load, but
50+
;; additionally add lhs meets these criteria.
5051

5152
(rule -4 (lower (has_type (fits_in_64 ty)
52-
(iadd x (simm32_from_value y))))
53-
(x64_add ty x y))
54-
55-
(rule -3 (lower (has_type (fits_in_64 ty)
5653
(iadd (simm32_from_value x) y)))
5754
(x64_add ty y x))
58-
59-
;; Add a register and memory.
60-
61-
(rule -2 (lower (has_type (fits_in_64 ty)
62-
(iadd x (sinkable_load y))))
63-
(x64_add ty
64-
x
65-
(sink_load_to_gpr_mem_imm y)))
66-
67-
(rule -1 (lower (has_type (fits_in_64 ty)
55+
(rule -3 (lower (has_type (fits_in_64 ty)
6856
(iadd (sinkable_load x) y)))
69-
(x64_add ty
70-
y
71-
(sink_load_to_gpr_mem_imm x)))
57+
(x64_add ty y x))
7258

7359
;; SSE.
7460

@@ -144,17 +130,6 @@
144130
(isub x y)))
145131
(x64_sub ty x y))
146132

147-
;; Sub a register and an immediate.
148-
(rule -2 (lower (has_type (fits_in_64 ty)
149-
(isub x (simm32_from_value y))))
150-
(x64_sub ty x y))
151-
152-
;; Sub a register and memory.
153-
(rule -1 (lower (has_type (fits_in_64 ty)
154-
(isub x (sinkable_load y))))
155-
(x64_sub ty x
156-
(sink_load_to_gpr_mem_imm y)))
157-
158133
;; SSE.
159134

160135
(rule (lower (has_type (multi_lane 8 16)
@@ -216,26 +191,14 @@
216191
(if (ty_int_ref_scalar_64 ty))
217192
(x64_and ty x y))
218193

219-
;; And with a memory operand.
220-
221-
(rule 1 (lower (has_type ty (band x (sinkable_load y))))
222-
(if (ty_int_ref_scalar_64 ty))
223-
(x64_and ty x
224-
(sink_load_to_gpr_mem_imm y)))
225-
226-
(rule 2 (lower (has_type ty (band (sinkable_load x) y)))
227-
(if (ty_int_ref_scalar_64 ty))
228-
(x64_and ty
229-
y
230-
(sink_load_to_gpr_mem_imm x)))
231-
232-
;; And with an immediate.
194+
;; The above case automatically handles when the rhs is an immediate or a
195+
;; sinkable load, but additionally handle the lhs here.
233196

234-
(rule 3 (lower (has_type ty (band x (simm32_from_value y))))
197+
(rule 1 (lower (has_type ty (band (sinkable_load x) y)))
235198
(if (ty_int_ref_scalar_64 ty))
236-
(x64_and ty x y))
199+
(x64_and ty y x))
237200

238-
(rule 4 (lower (has_type ty (band (simm32_from_value x) y)))
201+
(rule 2 (lower (has_type ty (band (simm32_from_value x) y)))
239202
(if (ty_int_ref_scalar_64 ty))
240203
(x64_and ty y x))
241204

@@ -308,25 +271,14 @@
308271
(if (ty_int_ref_scalar_64 ty))
309272
(x64_or ty x y))
310273

311-
;; Or with a memory operand.
312-
313-
(rule 1 (lower (has_type ty (bor x (sinkable_load y))))
314-
(if (ty_int_ref_scalar_64 ty))
315-
(x64_or ty x
316-
(sink_load_to_gpr_mem_imm y)))
274+
;; Handle immediates/sinkable loads on the lhs in addition to the automatic
275+
;; handling of the rhs above
317276

318-
(rule 2 (lower (has_type ty (bor (sinkable_load x) y)))
277+
(rule 1 (lower (has_type ty (bor (sinkable_load x) y)))
319278
(if (ty_int_ref_scalar_64 ty))
320-
(x64_or ty y
321-
(sink_load_to_gpr_mem_imm x)))
322-
323-
;; Or with an immediate.
324-
325-
(rule 3 (lower (has_type ty (bor x (simm32_from_value y))))
326-
(if (ty_int_ref_scalar_64 ty))
327-
(x64_or ty x y))
279+
(x64_or ty y x))
328280

329-
(rule 4 (lower (has_type ty (bor (simm32_from_value x) y)))
281+
(rule 2 (lower (has_type ty (bor (simm32_from_value x) y)))
330282
(if (ty_int_ref_scalar_64 ty))
331283
(x64_or ty y x))
332284

@@ -371,23 +323,12 @@
371323
(if (ty_int_ref_scalar_64 ty))
372324
(x64_xor ty x y))
373325

374-
;; Xor with a memory operand.
326+
;; Handle xor with lhs immediates/sinkable loads in addition to the automatic
327+
;; handling of the rhs above.
375328

376-
(rule 1 (lower (has_type ty (bxor x (sinkable_load y))))
329+
(rule 1 (lower (has_type ty (bxor (sinkable_load x) y)))
377330
(if (ty_int_ref_scalar_64 ty))
378-
(x64_xor ty x
379-
(sink_load_to_gpr_mem_imm y)))
380-
381-
(rule 2 (lower (has_type ty (bxor (sinkable_load x) y)))
382-
(if (ty_int_ref_scalar_64 ty))
383-
(x64_xor ty y
384-
(sink_load_to_gpr_mem_imm x)))
385-
386-
;; Xor with an immediate.
387-
388-
(rule 3 (lower (has_type ty (bxor x (simm32_from_value y))))
389-
(if (ty_int_ref_scalar_64 ty))
390-
(x64_xor ty x y))
331+
(x64_xor ty y x))
391332

392333
(rule 4 (lower (has_type ty (bxor (simm32_from_value x) y)))
393334
(if (ty_int_ref_scalar_64 ty))
@@ -841,28 +782,15 @@
841782
(rule -5 (lower (has_type (fits_in_64 ty) (imul x y)))
842783
(x64_mul ty x y))
843784

844-
;; Multiply a register and an immediate.
845-
846-
(rule -3 (lower (has_type (fits_in_64 ty)
847-
(imul x (simm32_from_value y))))
848-
(x64_mul ty x y))
785+
;; Handle multiplication where the lhs is an immediate or sinkable load in
786+
;; addition to the automatic rhs handling above.
849787

850788
(rule -4 (lower (has_type (fits_in_64 ty)
851789
(imul (simm32_from_value x) y)))
852790
(x64_mul ty y x))
853-
854-
;; Multiply a register and a memory load.
855-
856-
(rule -2 (lower (has_type (fits_in_64 ty)
857-
(imul x (sinkable_load y))))
858-
(x64_mul ty
859-
x
860-
(sink_load_to_gpr_mem_imm y)))
861-
862-
(rule -1 (lower (has_type (fits_in_64 ty)
791+
(rule -3 (lower (has_type (fits_in_64 ty)
863792
(imul (sinkable_load x) y)))
864-
(x64_mul ty y
865-
(sink_load_to_gpr_mem_imm x)))
793+
(x64_mul ty y x))
866794

867795
;; `i128`.
868796

@@ -1459,32 +1387,19 @@
14591387
(x64_add_with_flags_paired ty a b)
14601388
(trap_if (CC.B) tc)))
14611389

1462-
;; Add a register and an immediate.
1390+
;; Handle lhs immediates/sinkable loads in addition to the automatic rhs
1391+
;; handling of above.
14631392

14641393
(rule 1 (lower (has_type (fits_in_64 ty)
1465-
(uadd_overflow_trap a (simm32_from_value b) tc)))
1466-
(with_flags
1467-
(x64_add_with_flags_paired ty a b)
1468-
(trap_if (CC.B) tc)))
1469-
1470-
(rule 2 (lower (has_type (fits_in_64 ty)
14711394
(uadd_overflow_trap (simm32_from_value a) b tc)))
14721395
(with_flags
14731396
(x64_add_with_flags_paired ty b a)
14741397
(trap_if (CC.B) tc)))
14751398

1476-
;; Add a register and memory.
1477-
1478-
(rule 3 (lower (has_type (fits_in_64 ty)
1479-
(uadd_overflow_trap a (sinkable_load b) tc)))
1480-
(with_flags
1481-
(x64_add_with_flags_paired ty a (sink_load_to_gpr_mem_imm b))
1482-
(trap_if (CC.B) tc)))
1483-
1484-
(rule 4 (lower (has_type (fits_in_64 ty)
1399+
(rule 2 (lower (has_type (fits_in_64 ty)
14851400
(uadd_overflow_trap (sinkable_load a) b tc)))
14861401
(with_flags
1487-
(x64_add_with_flags_paired ty b (sink_load_to_gpr_mem_imm a))
1402+
(x64_add_with_flags_paired ty b a)
14881403
(trap_if (CC.B) tc)))
14891404

14901405
;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3603,9 +3518,9 @@
36033518
;; Case 3: when presented with `load + scalar_to_vector`, coalesce into a single
36043519
;; MOVSS/MOVSD instruction.
36053520
(rule 2 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_32 _)))))
3606-
(x64_movss_load (sink_load_to_xmm_mem src)))
3521+
(x64_movss_load src))
36073522
(rule 3 (lower (scalar_to_vector (and (sinkable_load src) (value_type (ty_64 _)))))
3608-
(x64_movsd_load (sink_load_to_xmm_mem src)))
3523+
(x64_movsd_load src))
36093524

36103525
;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36113526

0 commit comments

Comments
 (0)