Skip to content

Commit add2976

Browse files
committed
compiler: implement better shuffle AIR
Runtime `@shuffle` has two cases which backends generally want to handle differently for efficiency: * One runtime vector operand; some result elements may be comptime-known * Two runtime vector operands; some result elements may be undefined The latter case happens if both vectors given to `@shuffle` are runtime-known and they are both used (i.e. the mask refers to them). Otherwise, if the result is not entirely comptime-known, we are in the former case. `Sema` now diffentiates these two cases in the AIR so that backends can easily handle them however they want to. Note that this *doesn't* really involve Sema doing any more work than it would otherwise need to, so there's not really a negative here! Most existing backends have their lowerings for `@shuffle` migrated in this commit. The LLVM backend uses new lowerings suggested by Jacob as ones which it will handle effectively. The x86_64 backend has not yet been migrated; for now there's a panic in there. Jacob will implement that before this is merged anywhere.
1 parent b48d6ff commit add2976

File tree

18 files changed

+755
-321
lines changed

18 files changed

+755
-321
lines changed

src/Air.zig

Lines changed: 119 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,21 @@ pub const Inst = struct {
699699
/// equal to the scalar value.
700700
/// Uses the `ty_op` field.
701701
splat,
702-
/// Constructs a vector by selecting elements from `a` and `b` based on `mask`.
703-
/// Uses the `ty_pl` field with payload `Shuffle`.
704-
shuffle,
702+
/// Constructs a vector by selecting elements from a single vector based on a mask. Each
703+
/// mask element is either an index into the vector, or a comptime-known value, or "undef".
704+
/// Uses the `ty_pl` field, where the payload index points to:
705+
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
706+
/// 2. operand: Ref // guaranteed not to be an interned value
707+
/// See `unwrapShufleOne`.
708+
shuffle_one,
709+
/// Constructs a vector by selecting elements from two vectors based on a mask. Each mask
710+
/// element is either an index into one of the vectors, or "undef".
711+
/// Uses the `ty_pl` field, where the payload index points to:
712+
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
713+
/// 2. operand_a: Ref // guaranteed not to be an interned value
714+
/// 3. operand_b: Ref // guaranteed not to be an interned value
715+
/// See `unwrapShufleTwo`.
716+
shuffle_two,
705717
/// Constructs a vector element-wise from `a` or `b` based on `pred`.
706718
/// Uses the `pl_op` field with `pred` as operand, and payload `Bin`.
707719
select,
@@ -1299,13 +1311,6 @@ pub const FieldParentPtr = struct {
12991311
field_index: u32,
13001312
};
13011313

1302-
pub const Shuffle = struct {
1303-
a: Inst.Ref,
1304-
b: Inst.Ref,
1305-
mask: InternPool.Index,
1306-
mask_len: u32,
1307-
};
1308-
13091314
pub const VectorCmp = struct {
13101315
lhs: Inst.Ref,
13111316
rhs: Inst.Ref,
@@ -1320,6 +1325,64 @@ pub const VectorCmp = struct {
13201325
}
13211326
};
13221327

1328+
/// Used by `Inst.Tag.shuffle_one`. Represents a mask element which either indexes into a
1329+
/// runtime-known vector, or is a comptime-known value.
1330+
pub const ShuffleOneMask = packed struct(u32) {
1331+
index: u31,
1332+
kind: enum(u1) { elem, value },
1333+
pub fn elem(idx: u32) ShuffleOneMask {
1334+
return .{ .index = @intCast(idx), .kind = .elem };
1335+
}
1336+
pub fn value(val: Value) ShuffleOneMask {
1337+
return .{ .index = @intCast(@intFromEnum(val.toIntern())), .kind = .value };
1338+
}
1339+
pub const Unwrapped = union(enum) {
1340+
/// The resulting element is this index into the runtime vector.
1341+
elem: u32,
1342+
/// The resulting element is this comptime-known value.
1343+
/// It is correctly typed. It might be `undefined`.
1344+
value: InternPool.Index,
1345+
};
1346+
pub fn unwrap(raw: ShuffleOneMask) Unwrapped {
1347+
return switch (raw.kind) {
1348+
.elem => .{ .elem = raw.index },
1349+
.value => .{ .value = @enumFromInt(raw.index) },
1350+
};
1351+
}
1352+
};
1353+
1354+
/// Used by `Inst.Tag.shuffle_two`. Represents a mask element which either indexes into one
1355+
/// of two runtime-known vectors, or is undefined.
1356+
pub const ShuffleTwoMask = enum(u32) {
1357+
undef = std.math.maxInt(u32),
1358+
_,
1359+
pub fn aElem(idx: u32) ShuffleTwoMask {
1360+
return @enumFromInt(idx << 1);
1361+
}
1362+
pub fn bElem(idx: u32) ShuffleTwoMask {
1363+
return @enumFromInt(idx << 1 | 1);
1364+
}
1365+
pub const Unwrapped = union(enum) {
1366+
/// The resulting element is this index into the first runtime vector.
1367+
a_elem: u32,
1368+
/// The resulting element is this index into the second runtime vector.
1369+
b_elem: u32,
1370+
/// The resulting element is `undefined`.
1371+
undef,
1372+
};
1373+
pub fn unwrap(raw: ShuffleTwoMask) Unwrapped {
1374+
switch (raw) {
1375+
.undef => return .undef,
1376+
_ => {},
1377+
}
1378+
const x = @intFromEnum(raw);
1379+
return switch (@as(u1, @truncate(x))) {
1380+
0 => .{ .a_elem = x >> 1 },
1381+
1 => .{ .b_elem = x >> 1 },
1382+
};
1383+
}
1384+
};
1385+
13231386
/// Trailing:
13241387
/// 0. `Inst.Ref` for every outputs_len
13251388
/// 1. `Inst.Ref` for every inputs_len
@@ -1503,7 +1566,6 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
15031566
.cmpxchg_weak,
15041567
.cmpxchg_strong,
15051568
.slice,
1506-
.shuffle,
15071569
.aggregate_init,
15081570
.union_init,
15091571
.field_parent_ptr,
@@ -1517,6 +1579,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
15171579
.ptr_sub,
15181580
.try_ptr,
15191581
.try_ptr_cold,
1582+
.shuffle_one,
1583+
.shuffle_two,
15201584
=> return datas[@intFromEnum(inst)].ty_pl.ty.toType(),
15211585

15221586
.not,
@@ -1903,7 +1967,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
19031967
.reduce,
19041968
.reduce_optimized,
19051969
.splat,
1906-
.shuffle,
1970+
.shuffle_one,
1971+
.shuffle_two,
19071972
.select,
19081973
.is_named_enum_value,
19091974
.tag_name,
@@ -2030,6 +2095,48 @@ pub fn unwrapSwitch(air: *const Air, switch_inst: Inst.Index) UnwrappedSwitch {
20302095
};
20312096
}
20322097

2098+
pub fn unwrapShuffleOne(air: *const Air, zcu: *const Zcu, inst_index: Inst.Index) struct {
2099+
result_ty: Type,
2100+
operand: Inst.Ref,
2101+
mask: []const ShuffleOneMask,
2102+
} {
2103+
const inst = air.instructions.get(@intFromEnum(inst_index));
2104+
switch (inst.tag) {
2105+
.shuffle_one => {},
2106+
else => unreachable, // assertion failure
2107+
}
2108+
const result_ty: Type = .fromInterned(inst.data.ty_pl.ty.toInterned().?);
2109+
const mask_len: u32 = result_ty.vectorLen(zcu);
2110+
const extra_idx = inst.data.ty_pl.payload;
2111+
return .{
2112+
.result_ty = result_ty,
2113+
.operand = @enumFromInt(air.extra.items[extra_idx + mask_len]),
2114+
.mask = @ptrCast(air.extra.items[extra_idx..][0..mask_len]),
2115+
};
2116+
}
2117+
2118+
pub fn unwrapShuffleTwo(air: *const Air, zcu: *const Zcu, inst_index: Inst.Index) struct {
2119+
result_ty: Type,
2120+
operand_a: Inst.Ref,
2121+
operand_b: Inst.Ref,
2122+
mask: []const ShuffleTwoMask,
2123+
} {
2124+
const inst = air.instructions.get(@intFromEnum(inst_index));
2125+
switch (inst.tag) {
2126+
.shuffle_two => {},
2127+
else => unreachable, // assertion failure
2128+
}
2129+
const result_ty: Type = .fromInterned(inst.data.ty_pl.ty.toInterned().?);
2130+
const mask_len: u32 = result_ty.vectorLen(zcu);
2131+
const extra_idx = inst.data.ty_pl.payload;
2132+
return .{
2133+
.result_ty = result_ty,
2134+
.operand_a = @enumFromInt(air.extra.items[extra_idx + mask_len + 0]),
2135+
.operand_b = @enumFromInt(air.extra.items[extra_idx + mask_len + 1]),
2136+
.mask = @ptrCast(air.extra.items[extra_idx..][0..mask_len]),
2137+
};
2138+
}
2139+
20332140
pub const typesFullyResolved = types_resolved.typesFullyResolved;
20342141
pub const typeFullyResolved = types_resolved.checkType;
20352142
pub const valFullyResolved = types_resolved.checkVal;

src/Air/Legalize.zig

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,8 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
521521
}
522522
},
523523
.splat,
524-
.shuffle,
524+
.shuffle_one,
525+
.shuffle_two,
525526
=> {},
526527
.select,
527528
=> if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select_pl_op_bin),

src/Air/Liveness.zig

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ const Liveness = @This();
1515
const trace = @import("../tracy.zig").trace;
1616
const Air = @import("../Air.zig");
1717
const InternPool = @import("../InternPool.zig");
18+
const Zcu = @import("../Zcu.zig");
1819

1920
pub const Verify = @import("Liveness/Verify.zig");
2021

@@ -136,12 +137,15 @@ fn LivenessPassData(comptime pass: LivenessPass) type {
136137
};
137138
}
138139

139-
pub fn analyze(gpa: Allocator, air: Air, intern_pool: *InternPool) Allocator.Error!Liveness {
140+
pub fn analyze(zcu: *Zcu, air: Air, intern_pool: *InternPool) Allocator.Error!Liveness {
140141
const tracy = trace(@src());
141142
defer tracy.end();
142143

144+
const gpa = zcu.gpa;
145+
143146
var a: Analysis = .{
144147
.gpa = gpa,
148+
.zcu = zcu,
145149
.air = air,
146150
.tomb_bits = try gpa.alloc(
147151
usize,
@@ -220,6 +224,7 @@ const OperandCategory = enum {
220224
pub fn categorizeOperand(
221225
l: Liveness,
222226
air: Air,
227+
zcu: *Zcu,
223228
inst: Air.Inst.Index,
224229
operand: Air.Inst.Index,
225230
ip: *const InternPool,
@@ -511,10 +516,15 @@ pub fn categorizeOperand(
511516
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
512517
return .none;
513518
},
514-
.shuffle => {
515-
const extra = air.extraData(Air.Shuffle, air_datas[@intFromEnum(inst)].ty_pl.payload).data;
516-
if (extra.a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
517-
if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
519+
.shuffle_one => {
520+
const unwrapped = air.unwrapShuffleOne(zcu, inst);
521+
if (unwrapped.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
522+
return .none;
523+
},
524+
.shuffle_two => {
525+
const unwrapped = air.unwrapShuffleTwo(zcu, inst);
526+
if (unwrapped.operand_a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
527+
if (unwrapped.operand_b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
518528
return .none;
519529
},
520530
.reduce, .reduce_optimized => {
@@ -639,7 +649,7 @@ pub fn categorizeOperand(
639649

640650
var operand_live: bool = true;
641651
for (&[_]Air.Inst.Index{ then_body[0], else_body[0] }) |cond_inst| {
642-
if (l.categorizeOperand(air, cond_inst, operand, ip) == .tomb)
652+
if (l.categorizeOperand(air, zcu, cond_inst, operand, ip) == .tomb)
643653
operand_live = false;
644654

645655
switch (air_tags[@intFromEnum(cond_inst)]) {
@@ -824,6 +834,7 @@ pub const BigTomb = struct {
824834
/// In-progress data; on successful analysis converted into `Liveness`.
825835
const Analysis = struct {
826836
gpa: Allocator,
837+
zcu: *Zcu,
827838
air: Air,
828839
intern_pool: *InternPool,
829840
tomb_bits: []usize,
@@ -1119,9 +1130,13 @@ fn analyzeInst(
11191130
const extra = a.air.extraData(Air.Bin, pl_op.payload).data;
11201131
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, extra.lhs, extra.rhs });
11211132
},
1122-
.shuffle => {
1123-
const extra = a.air.extraData(Air.Shuffle, inst_datas[@intFromEnum(inst)].ty_pl.payload).data;
1124-
return analyzeOperands(a, pass, data, inst, .{ extra.a, extra.b, .none });
1133+
.shuffle_one => {
1134+
const unwrapped = a.air.unwrapShuffleOne(a.zcu, inst);
1135+
return analyzeOperands(a, pass, data, inst, .{ unwrapped.operand, .none, .none });
1136+
},
1137+
.shuffle_two => {
1138+
const unwrapped = a.air.unwrapShuffleTwo(a.zcu, inst);
1139+
return analyzeOperands(a, pass, data, inst, .{ unwrapped.operand_a, unwrapped.operand_b, .none });
11251140
},
11261141
.reduce, .reduce_optimized => {
11271142
const reduce = inst_datas[@intFromEnum(inst)].reduce;

src/Air/Liveness/Verify.zig

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Verifies that Liveness information is valid.
22

33
gpa: std.mem.Allocator,
4+
zcu: *Zcu,
45
air: Air,
56
liveness: Liveness,
67
live: LiveMap = .{},
@@ -287,10 +288,13 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
287288
const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
288289
try self.verifyInstOperands(inst, .{ extra.lhs, extra.rhs, .none });
289290
},
290-
.shuffle => {
291-
const ty_pl = data[@intFromEnum(inst)].ty_pl;
292-
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
293-
try self.verifyInstOperands(inst, .{ extra.a, extra.b, .none });
291+
.shuffle_one => {
292+
const unwrapped = self.air.unwrapShuffleOne(self.zcu, inst);
293+
try self.verifyInstOperands(inst, .{ unwrapped.operand, .none, .none });
294+
},
295+
.shuffle_two => {
296+
const unwrapped = self.air.unwrapShuffleTwo(self.zcu, inst);
297+
try self.verifyInstOperands(inst, .{ unwrapped.operand_a, unwrapped.operand_b, .none });
294298
},
295299
.cmp_vector,
296300
.cmp_vector_optimized,
@@ -639,4 +643,5 @@ const log = std.log.scoped(.liveness_verify);
639643
const Air = @import("../../Air.zig");
640644
const Liveness = @import("../Liveness.zig");
641645
const InternPool = @import("../../InternPool.zig");
646+
const Zcu = @import("../../Zcu.zig");
642647
const Verify = @This();

src/Air/types_resolved.zig

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -249,12 +249,22 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
249249
if (!checkRef(extra.struct_operand, zcu)) return false;
250250
},
251251

252-
.shuffle => {
253-
const extra = air.extraData(Air.Shuffle, data.ty_pl.payload).data;
254-
if (!checkType(data.ty_pl.ty.toType(), zcu)) return false;
255-
if (!checkRef(extra.a, zcu)) return false;
256-
if (!checkRef(extra.b, zcu)) return false;
257-
if (!checkVal(Value.fromInterned(extra.mask), zcu)) return false;
252+
.shuffle_one => {
253+
const unwrapped = air.unwrapShuffleOne(zcu, inst);
254+
if (!checkType(unwrapped.result_ty, zcu)) return false;
255+
if (!checkRef(unwrapped.operand, zcu)) return false;
256+
for (unwrapped.mask) |m| switch (m.unwrap()) {
257+
.elem => {},
258+
.value => |val| if (!checkVal(.fromInterned(val), zcu)) return false,
259+
};
260+
},
261+
262+
.shuffle_two => {
263+
const unwrapped = air.unwrapShuffleTwo(zcu, inst);
264+
if (!checkType(unwrapped.result_ty, zcu)) return false;
265+
if (!checkRef(unwrapped.operand_a, zcu)) return false;
266+
if (!checkRef(unwrapped.operand_b, zcu)) return false;
267+
// No values to check because there are no comptime-known values other than undef
258268
},
259269

260270
.cmpxchg_weak,

0 commit comments

Comments
 (0)