Skip to content

Commit ff74127

Browse files
committed
x86_64: implement prefetch
1 parent 82eedf5 commit ff74127

File tree

4 files changed

+80
-21
lines changed

4 files changed

+80
-21
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2484,7 +2484,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
24842484
.reduce => try cg.airReduce(inst),
24852485
.reduce_optimized => try cg.airReduce(inst),
24862486
.aggregate_init => try cg.airAggregateInit(inst),
2487-
.prefetch => try cg.airPrefetch(inst),
24882487
// zig fmt: on
24892488

24902489
.arg => if (cg.debug_output != .none) {
@@ -76418,6 +76417,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
7641876417
}, cg);
7641976418
try res.finish(inst, &.{extra.init}, &ops, cg);
7642076419
},
76420+
.prefetch => {
76421+
const prefetch = air_datas[@intFromEnum(inst)].prefetch;
76422+
var ops = try cg.tempsFromOperands(inst, .{prefetch.ptr});
76423+
switch (prefetch.cache) {
76424+
.instruction => {}, // prefetchi requires rip-relative addressing, which is currently non-trivial to emit from an arbitrary ptr value
76425+
.data => if (prefetch.rw == .write and prefetch.locality <= 2 and cg.hasFeature(.prefetchwt1)) {
76426+
try ops[0].toSlicePtr(cg);
76427+
while (try ops[0].toLea(cg)) {}
76428+
try cg.asmMemory(.{ ._wt1, .prefetch }, try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = .byte }));
76429+
} else if (prefetch.rw == .write and cg.hasFeature(.prfchw)) {
76430+
try ops[0].toSlicePtr(cg);
76431+
while (try ops[0].toLea(cg)) {}
76432+
try cg.asmMemory(.{ ._w, .prefetch }, try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = .byte }));
76433+
} else if (cg.hasFeature(.sse) or cg.hasFeature(.prfchw) or cg.hasFeature(.prefetchi) or cg.hasFeature(.prefetchwt1)) {
76434+
try ops[0].toSlicePtr(cg);
76435+
while (try ops[0].toLea(cg)) {}
76436+
switch (prefetch.locality) {
76437+
0 => try cg.asmMemory(.{ ._nta, .prefetch }, try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = .byte })),
76438+
1 => try cg.asmMemory(.{ ._t2, .prefetch }, try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = .byte })),
76439+
2 => try cg.asmMemory(.{ ._t1, .prefetch }, try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = .byte })),
76440+
3 => try cg.asmMemory(.{ ._t0, .prefetch }, try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = .byte })),
76441+
}
76442+
},
76443+
}
76444+
const res = try cg.tempInit(.void, .none);
76445+
try res.finish(inst, &.{prefetch.ptr}, &ops, cg);
76446+
},
7642176447
.mul_add => |air_tag| if (use_old) try cg.airMulAdd(inst) else {
7642276448
const pl_op = air_datas[@intFromEnum(inst)].pl_op;
7642376449
const bin_op = cg.air.extraData(Air.Bin, pl_op.payload).data;
@@ -94743,11 +94769,6 @@ fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void {
9474394769
return self.finishAir(inst, result, .{ extra.init, .none, .none });
9474494770
}
9474594771

94746-
fn airPrefetch(self: *CodeGen, inst: Air.Inst.Index) !void {
94747-
const prefetch = self.air.instructions.items(.data)[@intFromEnum(inst)].prefetch;
94748-
return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none });
94749-
}
94750-
9475194772
fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void {
9475294773
const pt = self.pt;
9475394774
const zcu = pt.zcu;

src/arch/x86_64/Encoding.zig

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,15 @@ pub fn findByMnemonic(
7878
),
7979
.x86_64 => false,
8080
},
81-
inline .@"invpcid 64bit", .@"rdpid 64bit" => |tag| switch (target.cpu.arch) {
81+
inline .@"invpcid 64bit", .@"rdpid 64bit", .@"prefetchi 64bit" => |tag| switch (target.cpu.arch) {
8282
else => unreachable,
8383
.x86 => false,
8484
.x86_64 => std.Target.x86.featureSetHas(
8585
target.cpu.features,
8686
@field(std.Target.x86.Feature, @tagName(tag)[0 .. @tagName(tag).len - " 64bit".len]),
8787
),
8888
},
89+
.prefetch => std.Target.x86.featureSetHasAny(target.cpu.features, .{ .sse, .prfchw, .prefetchi, .prefetchwt1 }),
8990
inline else => |tag| has_features: {
9091
comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' ');
9192
comptime var features: []const std.Target.x86.Feature = &.{};
@@ -375,6 +376,7 @@ pub const Mnemonic = enum {
375376
orps,
376377
pextrw, pinsrw,
377378
pmaxsw, pmaxub, pminsw, pminub, pmovmskb,
379+
prefetchit0, prefetchit1, prefetchnta, prefetcht0, prefetcht1, prefetcht2, prefetchw, prefetchwt1,
378380
shufps,
379381
sqrtps, sqrtss,
380382
stmxcsr,
@@ -562,8 +564,7 @@ pub const Op = enum {
562564
r32_m8, r32_m16, r64_m16,
563565
m8, m16, m32, m64, m80, m128, m256,
564566
rel8, rel16, rel32,
565-
m,
566-
moffs,
567+
m, moffs, mrip8,
567568
sreg,
568569
st0, st, mm, mm_m64,
569570
xmm0, xmm, xmm_m8, xmm_m16, xmm_m32, xmm_m64, xmm_m128,
@@ -617,7 +618,7 @@ pub const Op = enum {
617618

618619
.mem => |mem| switch (mem) {
619620
.moffs => .moffs,
620-
.sib, .rip => switch (mem.bitSize(target)) {
621+
.sib => switch (mem.bitSize(target)) {
621622
0 => .m,
622623
8 => .m8,
623624
16 => .m16,
@@ -628,6 +629,16 @@ pub const Op = enum {
628629
256 => .m256,
629630
else => unreachable,
630631
},
632+
.rip => switch (mem.bitSize(target)) {
633+
0, 8 => .mrip8,
634+
16 => .m16,
635+
32 => .m32,
636+
64 => .m64,
637+
80 => .m80,
638+
128 => .m128,
639+
256 => .m256,
640+
else => unreachable,
641+
},
631642
},
632643

633644
.imm => |imm| switch (imm) {
@@ -680,7 +691,7 @@ pub const Op = enum {
680691

681692
pub fn immBitSize(op: Op) u64 {
682693
return switch (op) {
683-
.none, .moffs, .m, .sreg => unreachable,
694+
.none, .m, .moffs, .mrip8, .sreg => unreachable,
684695
.al, .cl, .dx, .rip, .eip, .ip, .r8, .rm8, .r32_m8 => unreachable,
685696
.ax, .r16, .rm16 => unreachable,
686697
.eax, .r32, .rm32, .r32_m16 => unreachable,
@@ -700,7 +711,7 @@ pub const Op = enum {
700711

701712
pub fn regBitSize(op: Op) u64 {
702713
return switch (op) {
703-
.none, .moffs, .m, .sreg => unreachable,
714+
.none, .m, .moffs, .mrip8, .sreg => unreachable,
704715
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
705716
.rel8, .rel16, .rel32 => unreachable,
706717
.m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable,
@@ -716,13 +727,13 @@ pub const Op = enum {
716727

717728
pub fn memBitSize(op: Op) u64 {
718729
return switch (op) {
719-
.none, .moffs, .m, .sreg => unreachable,
730+
.none, .m, .moffs, .sreg => unreachable,
720731
.unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable,
721732
.rel8, .rel16, .rel32 => unreachable,
722733
.al, .cl, .r8, .ax, .dx, .ip, .r16, .eax, .eip, .r32, .rax, .rip, .r64 => unreachable,
723734
.st0, .st, .mm, .xmm0, .xmm, .ymm => unreachable,
724735
.cr, .dr => unreachable,
725-
.m8, .rm8, .r32_m8, .xmm_m8 => 8,
736+
.mrip8, .m8, .rm8, .r32_m8, .xmm_m8 => 8,
726737
.m16, .rm16, .r32_m16, .r64_m16, .xmm_m16 => 16,
727738
.m32, .rm32, .xmm_m32 => 32,
728739
.m64, .rm64, .mm_m64, .xmm_m64 => 64,
@@ -783,7 +794,7 @@ pub const Op = enum {
783794
.rm8, .rm16, .rm32, .rm64,
784795
.r32_m8, .r32_m16, .r64_m16,
785796
.m8, .m16, .m32, .m64, .m80, .m128, .m256,
786-
.m,
797+
.m, .moffs, .mrip8,
787798
.mm_m64,
788799
.xmm_m8, .xmm_m16, .xmm_m32, .xmm_m64, .xmm_m128,
789800
.ymm_m256,
@@ -821,11 +832,7 @@ pub const Op = enum {
821832
/// Given an operand `op` checks if `target` is a subset for the purposes of the encoding.
822833
pub fn isSubset(op: Op, target: Op) bool {
823834
switch (op) {
824-
.moffs, .sreg => return op == target,
825-
.none => switch (target) {
826-
.none => return true,
827-
else => return false,
828-
},
835+
.none, .m, .moffs, .sreg => return op == target,
829836
else => {
830837
if (op.isRegister() and target.isRegister()) {
831838
return switch (target.toReg()) {
@@ -836,6 +843,7 @@ pub const Op = enum {
836843
if (op.isMemory() and target.isMemory()) {
837844
switch (target) {
838845
.m => return true,
846+
.moffs, .mrip8 => return op == target,
839847
else => return op.memBitSize() == target.memBitSize(),
840848
}
841849
}
@@ -962,6 +970,10 @@ pub const Feature = enum {
962970
@"pclmul avx",
963971
pku,
964972
popcnt,
973+
prefetch,
974+
@"prefetchi 64bit",
975+
prefetchwt1,
976+
prfchw,
965977
rdrnd,
966978
rdseed,
967979
@"rdpid 32bit",
@@ -1002,7 +1014,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
10021014
}
10031015

10041016
const mnemonic_to_encodings_map = init: {
1005-
@setEvalBranchQuota(5_700);
1017+
@setEvalBranchQuota(5_800);
10061018
const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len;
10071019
var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
10081020
const encodings = @import("encodings.zig");

src/arch/x86_64/Mir.zig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,18 @@ pub const Inst = struct {
3434
/// ___ 4
3535
_4,
3636

37+
/// ___ With NTA Hint
38+
_nta,
3739
/// System Call ___
3840
sys_,
41+
/// ___ With T0 Hint
42+
_t0,
43+
/// ___ With T1 Hint
44+
_t1,
45+
/// ___ With T2 Hint
46+
_t2,
47+
/// ___ With Intent to Write and T1 Hint
48+
_wt1,
3949

4050
/// ___ crement Shadow Stack Pointer Doubleword
4151
_csspd,
@@ -198,6 +208,7 @@ pub const Inst = struct {
198208
//_b,
199209
/// ___ Word
200210
/// ___ For Writing
211+
/// ___ With Intent to Write
201212
_w,
202213
/// ___ Doubleword
203214
//_d,
@@ -975,6 +986,9 @@ pub const Inst = struct {
975986
/// Move unaligned packed single-precision floating-point values
976987
/// Move unaligned packed double-precision floating-point values
977988
movu,
989+
/// Prefetch data into caches
990+
/// Prefetch data into caches with intent to write
991+
prefetch,
978992
/// Packed interleave shuffle of quadruplets of single-precision floating-point values
979993
/// Packed interleave shuffle of pairs of double-precision floating-point values
980994
/// Shuffle packed doublewords

src/arch/x86_64/encodings.zig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,6 +1370,18 @@ pub const table = [_]Entry{
13701370
.{ .pmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
13711371
.{ .pmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .none, .sse },
13721372

1373+
.{ .prefetchit0, .m, &.{ .mrip8 }, &.{ 0x0f, 0x18 }, 7, .none, .@"prefetchi 64bit" },
1374+
.{ .prefetchit1, .m, &.{ .mrip8 }, &.{ 0x0f, 0x18 }, 6, .none, .@"prefetchi 64bit" },
1375+
1376+
.{ .prefetchnta, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 0, .none, .prefetch },
1377+
.{ .prefetcht0, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 1, .none, .prefetch },
1378+
.{ .prefetcht1, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 2, .none, .prefetch },
1379+
.{ .prefetcht2, .m, &.{ .m8 }, &.{ 0x0f, 0x18 }, 3, .none, .prefetch },
1380+
1381+
.{ .prefetchw, .m, &.{ .m8 }, &.{ 0x0f, 0x0d }, 1, .none, .prfchw },
1382+
1383+
.{ .prefetchwt1, .m, &.{ .m8 }, &.{ 0x0f, 0x0d }, 2, .none, .prefetchwt1 },
1384+
13731385
.{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse },
13741386

13751387
.{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse },

0 commit comments

Comments
 (0)