Skip to content

Commit 3e5e7a0

Browse files
x64: convert unpack and interleave instructions to new assembler (#10842)
* unpcklps inst in new assembler, remove old inst * unpcklpd inst in new assembler, remove old inst implementation * unpckhps instruction * punpckhbw instruction * Punpckhwd instruction * impl remaining unpack/interleave instructions
1 parent d8b369b commit 3e5e7a0

File tree

13 files changed

+65
-128
lines changed

13 files changed

+65
-128
lines changed

cranelift/assembler-x64/meta/src/instructions.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ mod or;
1414
mod shift;
1515
mod sqrt;
1616
mod sub;
17+
mod unpack;
1718
mod xor;
1819

1920
use crate::dsl::Inst;
@@ -36,5 +37,6 @@ pub fn list() -> Vec<Inst> {
3637
all.extend(sqrt::list());
3738
all.extend(sub::list());
3839
all.extend(xor::list());
40+
all.extend(unpack::list());
3941
all
4042
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
use crate::dsl::{Feature::*, Inst, Location::*};
2+
use crate::dsl::{fmt, inst, r, rex, rw};
3+
4+
#[rustfmt::skip] // Keeps instructions on a single line.
5+
pub fn list() -> Vec<Inst> {
6+
vec![
7+
// Vector instructions.
8+
inst("unpcklps", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0xF, 0x14]).r(), _64b | compat | sse),
9+
inst("unpcklpd", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x14]).r(), _64b | compat | sse2),
10+
inst("unpckhps", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0xF, 0x15]).r(), _64b | compat | sse),
11+
inst("punpckhbw", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x68]).r(), _64b | compat | sse2),
12+
inst("punpckhwd", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x69]).r(), _64b | compat | sse2),
13+
inst("punpckhdq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x6A]).r(), _64b | compat | sse2),
14+
inst("punpcklwd", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x61]).r(), _64b | compat | sse2),
15+
inst("punpcklqdq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x6C]).r(), _64b | compat | sse2),
16+
inst("punpcklbw", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x60]).r(), _64b | compat | sse2),
17+
inst("punpckldq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x62]).r(), _64b | compat | sse2),
18+
inst("punpckhqdq", fmt("A", [rw(xmm1), r(xmm_m128)]), rex([0x66, 0x0F, 0x6D]).r(), _64b | compat | sse2),
19+
]
20+
}

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -816,10 +816,6 @@
816816
Pshufb
817817
Pshufd
818818
Ptest
819-
Punpckhbw
820-
Punpckhwd
821-
Punpcklbw
822-
Punpcklwd
823819
Rcpss
824820
Roundps
825821
Roundpd
@@ -829,13 +825,6 @@
829825
Shufps
830826
Ucomiss
831827
Ucomisd
832-
Unpcklps
833-
Unpcklpd
834-
Unpckhps
835-
Punpckhdq
836-
Punpckldq
837-
Punpckhqdq
838-
Punpcklqdq
839828
Pshuflw
840829
Pshufhw
841830
Pblendw
@@ -3445,71 +3434,71 @@
34453434
;; Helper for creating `punpckhwd` instructions.
34463435
(decl x64_punpckhwd (Xmm XmmMem) Xmm)
34473436
(rule 0 (x64_punpckhwd src1 src2)
3448-
(xmm_rm_r (SseOpcode.Punpckhwd) src1 src2))
3437+
(x64_punpckhwd_a src1 src2))
34493438
(rule 1 (x64_punpckhwd src1 src2)
34503439
(if-let true (use_avx))
34513440
(xmm_rmir_vex (AvxOpcode.Vpunpckhwd) src1 src2))
34523441

34533442
;; Helper for creating `punpcklwd` instructions.
34543443
(decl x64_punpcklwd (Xmm XmmMem) Xmm)
34553444
(rule 0 (x64_punpcklwd src1 src2)
3456-
(xmm_rm_r (SseOpcode.Punpcklwd) src1 src2))
3445+
(x64_punpcklwd_a src1 src2))
34573446
(rule 1 (x64_punpcklwd src1 src2)
34583447
(if-let true (use_avx))
34593448
(xmm_rmir_vex (AvxOpcode.Vpunpcklwd) src1 src2))
34603449

34613450
;; Helper for creating `punpckldq` instructions.
34623451
(decl x64_punpckldq (Xmm XmmMem) Xmm)
34633452
(rule 0 (x64_punpckldq src1 src2)
3464-
(xmm_rm_r (SseOpcode.Punpckldq) src1 src2))
3453+
(x64_punpckldq_a src1 src2))
34653454
(rule 1 (x64_punpckldq src1 src2)
34663455
(if-let true (use_avx))
34673456
(xmm_rmir_vex (AvxOpcode.Vpunpckldq) src1 src2))
34683457

34693458
;; Helper for creating `punpckhdq` instructions.
34703459
(decl x64_punpckhdq (Xmm XmmMem) Xmm)
34713460
(rule 0 (x64_punpckhdq src1 src2)
3472-
(xmm_rm_r (SseOpcode.Punpckhdq) src1 src2))
3461+
(x64_punpckhdq_a src1 src2))
34733462
(rule 1 (x64_punpckhdq src1 src2)
34743463
(if-let true (use_avx))
34753464
(xmm_rmir_vex (AvxOpcode.Vpunpckhdq) src1 src2))
34763465

34773466
;; Helper for creating `punpcklqdq` instructions.
34783467
(decl x64_punpcklqdq (Xmm XmmMem) Xmm)
34793468
(rule 0 (x64_punpcklqdq src1 src2)
3480-
(xmm_rm_r (SseOpcode.Punpcklqdq) src1 src2))
3469+
(x64_punpcklqdq_a src1 src2))
34813470
(rule 1 (x64_punpcklqdq src1 src2)
34823471
(if-let true (use_avx))
34833472
(xmm_rmir_vex (AvxOpcode.Vpunpcklqdq) src1 src2))
34843473

34853474
;; Helper for creating `punpckhqdq` instructions.
34863475
(decl x64_punpckhqdq (Xmm XmmMem) Xmm)
34873476
(rule 0 (x64_punpckhqdq src1 src2)
3488-
(xmm_rm_r (SseOpcode.Punpckhqdq) src1 src2))
3477+
(x64_punpckhqdq_a src1 src2))
34893478
(rule 1 (x64_punpckhqdq src1 src2)
34903479
(if-let true (use_avx))
34913480
(xmm_rmir_vex (AvxOpcode.Vpunpckhqdq) src1 src2))
34923481

34933482
;; Helper for creating `unpcklps` instructions.
34943483
(decl x64_unpcklps (Xmm XmmMem) Xmm)
34953484
(rule 0 (x64_unpcklps src1 src2)
3496-
(xmm_rm_r (SseOpcode.Unpcklps) src1 src2))
3485+
(x64_unpcklps_a src1 src2))
34973486
(rule 1 (x64_unpcklps src1 src2)
34983487
(if-let true (use_avx))
34993488
(xmm_rmir_vex (AvxOpcode.Vunpcklps) src1 src2))
35003489

35013490
;; Helper for creating `unpcklpd` instructions.
35023491
(decl x64_unpcklpd (Xmm XmmMem) Xmm)
35033492
(rule 0 (x64_unpcklpd src1 src2)
3504-
(xmm_rm_r (SseOpcode.Unpcklpd) src1 src2))
3493+
(x64_unpcklpd_a src1 src2))
35053494
(rule 1 (x64_unpcklpd src1 src2)
35063495
(if-let true (use_avx))
35073496
(xmm_rmir_vex (AvxOpcode.Vunpcklpd) src1 src2))
35083497

35093498
;; Helper for creating `unpckhps` instructions.
35103499
(decl x64_unpckhps (Xmm XmmMem) Xmm)
35113500
(rule 0 (x64_unpckhps src1 src2)
3512-
(xmm_rm_r (SseOpcode.Unpckhps) src1 src2))
3501+
(x64_unpckhps_a src1 src2))
35133502
(rule 1 (x64_unpckhps src1 src2)
35143503
(if-let true (use_avx))
35153504
(xmm_rmir_vex (AvxOpcode.Vunpckhps) src1 src2))
@@ -3804,15 +3793,15 @@
38043793
;; Helper for creating `punpcklbw` instructions.
38053794
(decl x64_punpcklbw (Xmm XmmMem) Xmm)
38063795
(rule 0 (x64_punpcklbw src1 src2)
3807-
(xmm_rm_r (SseOpcode.Punpcklbw) src1 src2))
3796+
(x64_punpcklbw_a src1 src2))
38083797
(rule 1 (x64_punpcklbw src1 src2)
38093798
(if-let true (use_avx))
38103799
(xmm_rmir_vex (AvxOpcode.Vpunpcklbw) src1 src2))
38113800

38123801
;; Helper for creating `punpckhbw` instructions.
38133802
(decl x64_punpckhbw (Xmm XmmMem) Xmm)
38143803
(rule 0 (x64_punpckhbw src1 src2)
3815-
(xmm_rm_r (SseOpcode.Punpckhbw) src1 src2))
3804+
(x64_punpckhbw_a src1 src2))
38163805
(rule 1 (x64_punpckhbw src1 src2)
38173806
(if-let true (use_avx))
38183807
(xmm_rmir_vex (AvxOpcode.Vpunpckhbw) src1 src2))

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -886,10 +886,6 @@ pub enum SseOpcode {
886886
Pshufb,
887887
Pshufd,
888888
Ptest,
889-
Punpckhbw,
890-
Punpckhwd,
891-
Punpcklbw,
892-
Punpcklwd,
893889
Rcpss,
894890
Roundps,
895891
Roundpd,
@@ -899,13 +895,6 @@ pub enum SseOpcode {
899895
Shufps,
900896
Ucomiss,
901897
Ucomisd,
902-
Unpcklps,
903-
Unpcklpd,
904-
Unpckhps,
905-
Punpckhdq,
906-
Punpckldq,
907-
Punpckhqdq,
908-
Punpcklqdq,
909898
Pshuflw,
910899
Pshufhw,
911900
Pblendw,
@@ -929,9 +918,7 @@ impl SseOpcode {
929918
| SseOpcode::Rcpss
930919
| SseOpcode::Rsqrtss
931920
| SseOpcode::Shufps
932-
| SseOpcode::Ucomiss
933-
| SseOpcode::Unpcklps
934-
| SseOpcode::Unpckhps => SSE,
921+
| SseOpcode::Ucomiss => SSE,
935922

936923
SseOpcode::Cmppd
937924
| SseOpcode::Cmpsd
@@ -960,18 +947,9 @@ impl SseOpcode {
960947
| SseOpcode::Pmullw
961948
| SseOpcode::Pmuludq
962949
| SseOpcode::Pshufd
963-
| SseOpcode::Punpckhbw
964-
| SseOpcode::Punpckhwd
965-
| SseOpcode::Punpcklbw
966-
| SseOpcode::Punpcklwd
967950
| SseOpcode::Ucomisd
968-
| SseOpcode::Punpckldq
969-
| SseOpcode::Punpckhdq
970-
| SseOpcode::Punpcklqdq
971-
| SseOpcode::Punpckhqdq
972951
| SseOpcode::Pshuflw
973-
| SseOpcode::Pshufhw
974-
| SseOpcode::Unpcklpd => SSE2,
952+
| SseOpcode::Pshufhw => SSE2,
975953

976954
SseOpcode::Pabsb
977955
| SseOpcode::Pabsw
@@ -1109,10 +1087,6 @@ impl fmt::Debug for SseOpcode {
11091087
SseOpcode::Pshufb => "pshufb",
11101088
SseOpcode::Pshufd => "pshufd",
11111089
SseOpcode::Ptest => "ptest",
1112-
SseOpcode::Punpckhbw => "punpckhbw",
1113-
SseOpcode::Punpckhwd => "punpckhwd",
1114-
SseOpcode::Punpcklbw => "punpcklbw",
1115-
SseOpcode::Punpcklwd => "punpcklwd",
11161090
SseOpcode::Rcpss => "rcpss",
11171091
SseOpcode::Roundps => "roundps",
11181092
SseOpcode::Roundpd => "roundpd",
@@ -1122,17 +1096,10 @@ impl fmt::Debug for SseOpcode {
11221096
SseOpcode::Shufps => "shufps",
11231097
SseOpcode::Ucomiss => "ucomiss",
11241098
SseOpcode::Ucomisd => "ucomisd",
1125-
SseOpcode::Unpcklps => "unpcklps",
1126-
SseOpcode::Unpckhps => "unpckhps",
1127-
SseOpcode::Punpckldq => "punpckldq",
1128-
SseOpcode::Punpckhdq => "punpckhdq",
1129-
SseOpcode::Punpcklqdq => "punpcklqdq",
1130-
SseOpcode::Punpckhqdq => "punpckhqdq",
11311099
SseOpcode::Pshuflw => "pshuflw",
11321100
SseOpcode::Pshufhw => "pshufhw",
11331101
SseOpcode::Pblendw => "pblendw",
11341102
SseOpcode::Movddup => "movddup",
1135-
SseOpcode::Unpcklpd => "unpcklpd",
11361103
};
11371104
write!(fmt, "{name}")
11381105
}

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,18 +1779,7 @@ pub(crate) fn emit(
17791779
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
17801780
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
17811781
SseOpcode::Pshufb => (LegacyPrefixes::_66, 0x0F3800, 3),
1782-
SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2),
1783-
SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2),
1784-
SseOpcode::Punpcklbw => (LegacyPrefixes::_66, 0x0F60, 2),
1785-
SseOpcode::Punpcklwd => (LegacyPrefixes::_66, 0x0F61, 2),
1786-
SseOpcode::Punpckldq => (LegacyPrefixes::_66, 0x0F62, 2),
1787-
SseOpcode::Punpcklqdq => (LegacyPrefixes::_66, 0x0F6C, 2),
1788-
SseOpcode::Punpckhdq => (LegacyPrefixes::_66, 0x0F6A, 2),
1789-
SseOpcode::Punpckhqdq => (LegacyPrefixes::_66, 0x0F6D, 2),
1790-
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
1791-
SseOpcode::Unpckhps => (LegacyPrefixes::None, 0x0F15, 2),
17921782
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
1793-
SseOpcode::Unpcklpd => (LegacyPrefixes::_66, 0x0F14, 2),
17941783
_ => unimplemented!("Opcode {:?} not implemented", op),
17951784
};
17961785

cranelift/codegen/src/isa/x64/inst/emit_tests.rs

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2791,36 +2791,6 @@ fn test_x64_emit() {
27912791
"packuswb %xmm4, %xmm9, %xmm4",
27922792
));
27932793

2794-
insns.push((
2795-
Inst::xmm_rm_r(SseOpcode::Punpckhbw, RegMem::reg(xmm3), w_xmm2),
2796-
"660F68D3",
2797-
"punpckhbw %xmm2, %xmm3, %xmm2",
2798-
));
2799-
2800-
insns.push((
2801-
Inst::xmm_rm_r(SseOpcode::Punpckhwd, RegMem::reg(xmm13), w_xmm2),
2802-
"66410F69D5",
2803-
"punpckhwd %xmm2, %xmm13, %xmm2",
2804-
));
2805-
2806-
insns.push((
2807-
Inst::xmm_rm_r(SseOpcode::Punpcklbw, RegMem::reg(xmm1), w_xmm8),
2808-
"66440F60C1",
2809-
"punpcklbw %xmm8, %xmm1, %xmm8",
2810-
));
2811-
2812-
insns.push((
2813-
Inst::xmm_rm_r(SseOpcode::Punpcklwd, RegMem::reg(xmm11), w_xmm8),
2814-
"66450F61C3",
2815-
"punpcklwd %xmm8, %xmm11, %xmm8",
2816-
));
2817-
2818-
insns.push((
2819-
Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2),
2820-
"410F14D3",
2821-
"unpcklps %xmm2, %xmm11, %xmm2",
2822-
));
2823-
28242794
// ========================================================
28252795
// XMM_RM_R: Integer Conversion
28262796

cranelift/filetests/filetests/isa/x64/bitcast.clif

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ block0(v0: i128):
194194
; block0:
195195
; movq %rdi, %xmm0
196196
; movq %rsi, %xmm5
197-
; punpcklqdq %xmm0, %xmm5, %xmm0
197+
; punpcklqdq %xmm5, %xmm0
198198
; movq %rbp, %rsp
199199
; popq %rbp
200200
; ret
@@ -252,7 +252,7 @@ block0(v0: i128):
252252
; block0:
253253
; movq %rdi, %xmm0
254254
; movq %rsi, %xmm5
255-
; punpcklqdq %xmm0, %xmm5, %xmm0
255+
; punpcklqdq %xmm5, %xmm0
256256
; movq %rbp, %rsp
257257
; popq %rbp
258258
; ret

cranelift/filetests/filetests/isa/x64/fcvt.clif

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ block0(v0: i32x4):
338338
; pushq %rbp
339339
; movq %rsp, %rbp
340340
; block0:
341-
; unpcklps %xmm0, const(0), %xmm0
341+
; unpcklps (%rip), %xmm0
342342
; subpd (%rip), %xmm0
343343
; movq %rbp, %rsp
344344
; popq %rbp
@@ -1215,7 +1215,7 @@ block0(v0: i64x2):
12151215
; pshufd $238, %xmm6, %xmm2
12161216
; movq %xmm2, %rcx
12171217
; cvtsi2sdq %rcx, %xmm1
1218-
; unpcklpd %xmm0, %xmm1, %xmm0
1218+
; unpcklpd %xmm1, %xmm0
12191219
; movq %rbp, %rsp
12201220
; popq %rbp
12211221
; ret

0 commit comments

Comments
 (0)