Skip to content

Commit ec579aa

Browse files
jacobly0mlugg
authored andcommitted
Legalize: implement scalarization of @shuffle
1 parent add2976 commit ec579aa

File tree

11 files changed

+328
-138
lines changed

11 files changed

+328
-138
lines changed

lib/std/Target.zig

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,11 +1246,7 @@ pub const Cpu = struct {
12461246

12471247
/// Adds the specified feature set but not its dependencies.
12481248
pub fn addFeatureSet(set: *Set, other_set: Set) void {
1249-
if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) {
1250-
for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* |= other_set_int;
1251-
} else {
1252-
set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints);
1253-
}
1249+
set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints);
12541250
}
12551251

12561252
/// Removes the specified feature but not its dependents.
@@ -1262,11 +1258,7 @@ pub const Cpu = struct {
12621258

12631259
/// Removes the specified feature but not its dependents.
12641260
pub fn removeFeatureSet(set: *Set, other_set: Set) void {
1265-
if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) {
1266-
for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* &= ~other_set_int;
1267-
} else {
1268-
set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints);
1269-
}
1261+
set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints);
12701262
}
12711263

12721264
pub fn populateDependencies(set: *Set, all_features_list: []const Cpu.Feature) void {
@@ -1295,17 +1287,10 @@ pub const Cpu = struct {
12951287
}
12961288

12971289
pub fn isSuperSetOf(set: Set, other_set: Set) bool {
1298-
if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) {
1299-
var result = true;
1300-
for (&set.ints, other_set.ints) |*set_int, other_set_int|
1301-
result = result and (set_int.* & other_set_int) == other_set_int;
1302-
return result;
1303-
} else {
1304-
const V = @Vector(usize_count, usize);
1305-
const set_v: V = set.ints;
1306-
const other_v: V = other_set.ints;
1307-
return @reduce(.And, (set_v & other_v) == other_v);
1308-
}
1290+
const V = @Vector(usize_count, usize);
1291+
const set_v: V = set.ints;
1292+
const other_v: V = other_set.ints;
1293+
return @reduce(.And, (set_v & other_v) == other_v);
13091294
}
13101295
};
13111296

lib/std/array_hash_map.zig

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -889,19 +889,10 @@ pub fn ArrayHashMapUnmanaged(
889889
self.pointer_stability.lock();
890890
defer self.pointer_stability.unlock();
891891

892-
if (new_capacity <= linear_scan_max) {
893-
try self.entries.ensureTotalCapacity(gpa, new_capacity);
894-
return;
895-
}
896-
897-
if (self.index_header) |header| {
898-
if (new_capacity <= header.capacity()) {
899-
try self.entries.ensureTotalCapacity(gpa, new_capacity);
900-
return;
901-
}
902-
}
903-
904892
try self.entries.ensureTotalCapacity(gpa, new_capacity);
893+
if (new_capacity <= linear_scan_max) return;
894+
if (self.index_header) |header| if (new_capacity <= header.capacity()) return;
895+
905896
const new_bit_index = try IndexHeader.findBitIndex(new_capacity);
906897
const new_header = try IndexHeader.alloc(gpa, new_bit_index);
907898

@@ -2116,7 +2107,7 @@ const IndexHeader = struct {
21162107

21172108
fn findBitIndex(desired_capacity: usize) Allocator.Error!u8 {
21182109
if (desired_capacity > max_capacity) return error.OutOfMemory;
2119-
var new_bit_index = @as(u8, @intCast(std.math.log2_int_ceil(usize, desired_capacity)));
2110+
var new_bit_index: u8 = @intCast(std.math.log2_int_ceil(usize, desired_capacity));
21202111
if (desired_capacity > index_capacities[new_bit_index]) new_bit_index += 1;
21212112
if (new_bit_index < min_bit_index) new_bit_index = min_bit_index;
21222113
assert(desired_capacity <= index_capacities[new_bit_index]);

lib/std/crypto/chacha20.zig

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -499,15 +499,12 @@ fn ChaChaNonVecImpl(comptime rounds_nb: usize) type {
499499
fn ChaChaImpl(comptime rounds_nb: usize) type {
500500
switch (builtin.cpu.arch) {
501501
.x86_64 => {
502-
const has_avx2 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx2);
503-
const has_avx512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
504-
if (builtin.zig_backend != .stage2_x86_64 and has_avx512f) return ChaChaVecImpl(rounds_nb, 4);
505-
if (has_avx2) return ChaChaVecImpl(rounds_nb, 2);
502+
if (builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f)) return ChaChaVecImpl(rounds_nb, 4);
503+
if (std.Target.x86.featureSetHas(builtin.cpu.features, .avx2)) return ChaChaVecImpl(rounds_nb, 2);
506504
return ChaChaVecImpl(rounds_nb, 1);
507505
},
508506
.aarch64 => {
509-
const has_neon = std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon);
510-
if (has_neon) return ChaChaVecImpl(rounds_nb, 4);
507+
if (builtin.zig_backend != .stage2_aarch64 and std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon)) return ChaChaVecImpl(rounds_nb, 4);
511508
return ChaChaNonVecImpl(rounds_nb);
512509
},
513510
else => return ChaChaNonVecImpl(rounds_nb),

lib/std/hash/xxhash.zig

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,6 @@ fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64)
780780
}
781781

782782
test "xxhash3" {
783-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
784783
if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
785784

786785
const H = XxHash3;
@@ -814,7 +813,6 @@ test "xxhash3" {
814813
}
815814

816815
test "xxhash3 smhasher" {
817-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
818816
if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
819817

820818
const Test = struct {
@@ -828,7 +826,6 @@ test "xxhash3 smhasher" {
828826
}
829827

830828
test "xxhash3 iterative api" {
831-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
832829
if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
833830

834831
const Test = struct {

lib/std/simd.zig

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,6 @@ pub fn extract(
231231
}
232232

233233
test "vector patterns" {
234-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
235-
236234
const base = @Vector(4, u32){ 10, 20, 30, 40 };
237235
const other_base = @Vector(4, u32){ 55, 66, 77, 88 };
238236

@@ -302,8 +300,6 @@ pub fn reverseOrder(vec: anytype) @TypeOf(vec) {
302300
}
303301

304302
test "vector shifting" {
305-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
306-
307303
const base = @Vector(4, u32){ 10, 20, 30, 40 };
308304

309305
try std.testing.expectEqual([4]u32{ 30, 40, 999, 999 }, shiftElementsLeft(base, 2, 999));

src/Air.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,15 +704,15 @@ pub const Inst = struct {
704704
/// Uses the `ty_pl` field, where the payload index points to:
705705
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
706706
/// 2. operand: Ref // guaranteed not to be an interned value
707-
/// See `unwrapShufleOne`.
707+
/// See `unwrapShuffleOne`.
708708
shuffle_one,
709709
/// Constructs a vector by selecting elements from two vectors based on a mask. Each mask
710710
/// element is either an index into one of the vectors, or "undef".
711711
/// Uses the `ty_pl` field, where the payload index points to:
712712
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
713713
/// 2. operand_a: Ref // guaranteed not to be an interned value
714714
/// 3. operand_b: Ref // guaranteed not to be an interned value
715-
/// See `unwrapShufleTwo`.
715+
/// See `unwrapShuffleTwo`.
716716
shuffle_two,
717717
/// Constructs a vector element-wise from `a` or `b` based on `pred`.
718718
/// Uses the `pl_op` field with `pred` as operand, and payload `Bin`.

0 commit comments

Comments
 (0)