Skip to content

Commit 98640cb

Browse files
authored
Merge pull request #23209 from jacobly0/x86_64-rewrite
x86_64: rewrite wrapping multiplication
2 parents f7e045c + 3c3a6c9 commit 98640cb

23 files changed

+31657
-27772
lines changed

lib/std/math/big/int.zig

Lines changed: 46 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,7 +1096,7 @@ pub const Mutable = struct {
10961096
/// Asserts there is enough memory to fit the result. The upper bound Limb count is
10971097
/// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`.
10981098
pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void {
1099-
llshl(r.limbs[0..], a.limbs[0..a.limbs.len], shift);
1099+
llshl(r.limbs, a.limbs, shift);
11001100
r.normalize(a.limbs.len + (shift / limb_bits) + 1);
11011101
r.positive = a.positive;
11021102
}
@@ -1165,7 +1165,7 @@ pub const Mutable = struct {
11651165

11661166
// This shift should not be able to overflow, so invoke llshl and normalize manually
11671167
// to avoid the extra required limb.
1168-
llshl(r.limbs[0..], a.limbs[0..a.limbs.len], shift);
1168+
llshl(r.limbs, a.limbs, shift);
11691169
r.normalize(a.limbs.len + (shift / limb_bits));
11701170
r.positive = a.positive;
11711171
}
@@ -1202,17 +1202,11 @@ pub const Mutable = struct {
12021202
break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0;
12031203
};
12041204

1205-
llshr(r.limbs[0..], a.limbs[0..a.limbs.len], shift);
1205+
llshr(r.limbs, a.limbs, shift);
12061206

12071207
r.len = a.limbs.len - full_limbs_shifted_out;
12081208
r.positive = a.positive;
1209-
if (nonzero_negative_shiftout) {
1210-
if (full_limbs_shifted_out > 0) {
1211-
r.limbs[a.limbs.len - full_limbs_shifted_out] = 0;
1212-
r.len += 1;
1213-
}
1214-
r.addScalar(r.toConst(), -1);
1215-
}
1209+
if (nonzero_negative_shiftout) r.addScalar(r.toConst(), -1);
12161210
r.normalize(r.len);
12171211
}
12181212

@@ -1755,119 +1749,60 @@ pub const Mutable = struct {
17551749
y.shiftRight(y.toConst(), norm_shift);
17561750
}
17571751

1758-
/// If a is positive, this passes through to truncate.
1759-
/// If a is negative, then r is set to positive with the bit pattern ~(a - 1).
1760-
/// r may alias a.
1761-
///
1762-
/// Asserts `r` has enough storage to store the result.
1763-
/// The upper bound is `calcTwosCompLimbCount(a.len)`.
1764-
pub fn convertToTwosComplement(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1765-
if (a.positive) {
1766-
r.truncate(a, signedness, bit_count);
1767-
return;
1768-
}
1769-
1770-
const req_limbs = calcTwosCompLimbCount(bit_count);
1771-
if (req_limbs == 0 or a.eqlZero()) {
1772-
r.set(0);
1773-
return;
1774-
}
1775-
1776-
const bit = @as(Log2Limb, @truncate(bit_count - 1));
1777-
const signmask = @as(Limb, 1) << bit;
1778-
const mask = (signmask << 1) -% 1;
1779-
1780-
r.addScalar(a.abs(), -1);
1781-
if (req_limbs > r.len) {
1782-
@memset(r.limbs[r.len..req_limbs], 0);
1783-
}
1784-
1785-
assert(r.limbs.len >= req_limbs);
1786-
r.len = req_limbs;
1787-
1788-
llnot(r.limbs[0..r.len]);
1789-
r.limbs[r.len - 1] &= mask;
1790-
r.normalize(r.len);
1791-
}
1792-
17931752
/// Truncate an integer to a number of bits, following 2s-complement semantics.
1794-
/// r may alias a.
1753+
/// `r` may alias `a`.
17951754
///
1796-
/// Asserts `r` has enough storage to store the result.
1755+
/// Asserts `r` has enough storage to compute the result.
17971756
/// The upper bound is `calcTwosCompLimbCount(a.len)`.
17981757
pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
1799-
const req_limbs = calcTwosCompLimbCount(bit_count);
1800-
const abs_trunc_a: Const = .{
1801-
.positive = true,
1802-
.limbs = a.limbs[0..@min(a.limbs.len, req_limbs)],
1803-
};
1804-
18051758
// Handle 0-bit integers.
1806-
if (req_limbs == 0 or abs_trunc_a.eqlZero()) {
1759+
if (bit_count == 0) {
1760+
@branchHint(.unlikely);
18071761
r.set(0);
18081762
return;
18091763
}
18101764

1811-
const bit = @as(Log2Limb, @truncate(bit_count - 1));
1812-
const signmask = @as(Limb, 1) << bit; // 0b0..010...0 where 1 is the sign bit.
1813-
const mask = (signmask << 1) -% 1; // 0b0..01..1 where the leftmost 1 is the sign bit.
1814-
1815-
if (!a.positive) {
1816-
// Convert the integer from sign-magnitude into twos-complement.
1817-
// -x = ~(x - 1)
1818-
// Note, we simply take req_limbs * @bitSizeOf(Limb) as the
1819-
// target bit count.
1820-
1821-
r.addScalar(abs_trunc_a, -1);
1765+
const max_limbs = calcTwosCompLimbCount(bit_count);
1766+
const sign_bit = @as(Limb, 1) << @truncate(bit_count - 1);
1767+
const mask = @as(Limb, maxInt(Limb)) >> @truncate(-%bit_count);
1768+
1769+
// Guess whether the result will have the same sign as `a`.
1770+
// * If the result will be signed zero, the guess is `true`.
1771+
// * If the result will be the minimum signed integer, the guess is `false`.
1772+
// * If the result will be unsigned zero, the guess is `a.positive`.
1773+
// * Otherwise the guess is correct.
1774+
const same_sign_guess = switch (signedness) {
1775+
.signed => max_limbs > a.limbs.len or a.limbs[max_limbs - 1] & sign_bit == 0,
1776+
.unsigned => a.positive,
1777+
};
18221778

1823-
// Zero-extend the result
1824-
@memset(r.limbs[r.len..req_limbs], 0);
1825-
r.len = req_limbs;
1826-
1827-
// Without truncating, we can already peek at the sign bit of the result here.
1828-
// Note that it will be 0 if the result is negative, as we did not apply the flip here.
1829-
// If the result is negative, we have
1830-
// -(-x & mask)
1831-
// = ~(~(x - 1) & mask) + 1
1832-
// = ~(~((x - 1) | ~mask)) + 1
1833-
// = ((x - 1) | ~mask)) + 1
1834-
// Note, this is only valid for the target bits and not the upper bits
1835-
// of the most significant limb. Those still need to be cleared.
1836-
// Also note that `mask` is zero for all other bits, reducing to the identity.
1837-
// This means that we still need to use & mask to clear off the upper bits.
1838-
1839-
if (signedness == .signed and r.limbs[r.len - 1] & signmask == 0) {
1840-
// Re-add the one and negate to get the result.
1841-
r.limbs[r.len - 1] &= mask;
1842-
// Note, addition cannot require extra limbs here as we did a subtraction before.
1843-
r.addScalar(r.toConst(), 1);
1844-
r.normalize(r.len);
1845-
r.positive = false;
1846-
} else {
1847-
llnot(r.limbs[0..r.len]);
1848-
r.limbs[r.len - 1] &= mask;
1849-
r.normalize(r.len);
1850-
}
1851-
} else {
1779+
const abs_trunc_a: Const = .{
1780+
.positive = true,
1781+
.limbs = a.limbs[0..llnormalize(a.limbs[0..@min(a.limbs.len, max_limbs)])],
1782+
};
1783+
if (same_sign_guess or abs_trunc_a.eqlZero()) {
1784+
// One of the following is true:
1785+
// * The result is zero.
1786+
// * The result is non-zero and has the same sign as `a`.
18521787
r.copy(abs_trunc_a);
1853-
// If the integer fits within target bits, no wrapping is required.
1854-
if (r.len < req_limbs) return;
1855-
1856-
r.limbs[r.len - 1] &= mask;
1788+
if (max_limbs <= r.len) r.limbs[max_limbs - 1] &= mask;
18571789
r.normalize(r.len);
1858-
1859-
if (signedness == .signed and r.limbs[r.len - 1] & signmask != 0) {
1860-
// Convert 2s-complement back to sign-magnitude.
1861-
// Sign-extend the upper bits so that they are inverted correctly.
1862-
r.limbs[r.len - 1] |= ~mask;
1863-
llnot(r.limbs[0..r.len]);
1864-
1865-
// Note, can only overflow if r holds 0xFFF...F which can only happen if
1866-
// a holds 0.
1867-
r.addScalar(r.toConst(), 1);
1868-
1869-
r.positive = false;
1870-
}
1790+
r.positive = a.positive or r.eqlZero();
1791+
} else {
1792+
// One of the following is true:
1793+
// * The result is the minimum signed integer.
1794+
// * The result is unsigned zero.
1795+
// * The result is non-zero and has the opposite sign as `a`.
1796+
r.addScalar(abs_trunc_a, -1);
1797+
llnot(r.limbs[0..r.len]);
1798+
@memset(r.limbs[r.len..max_limbs], maxInt(Limb));
1799+
r.limbs[max_limbs - 1] &= mask;
1800+
r.normalize(max_limbs);
1801+
r.positive = switch (signedness) {
1802+
// The only value with the sign bit still set is the minimum signed integer.
1803+
.signed => !a.positive and r.limbs[max_limbs - 1] & sign_bit == 0,
1804+
.unsigned => !a.positive or r.eqlZero(),
1805+
};
18711806
}
18721807
}
18731808

lib/std/math/big/int_test.zig

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,7 @@ test "mul large" {
10201020
// Generate a number that's large enough to cross the thresholds for the use
10211021
// of subquadratic algorithms
10221022
for (a.limbs) |*p| {
1023-
p.* = std.math.maxInt(Limb);
1023+
p.* = maxInt(Limb);
10241024
}
10251025
a.setMetadata(true, 50);
10261026

@@ -1104,7 +1104,7 @@ test "mulWrap large" {
11041104
// Generate a number that's large enough to cross the thresholds for the use
11051105
// of subquadratic algorithms
11061106
for (a.limbs) |*p| {
1107-
p.* = std.math.maxInt(Limb);
1107+
p.* = maxInt(Limb);
11081108
}
11091109
a.setMetadata(true, 50);
11101110

@@ -1961,23 +1961,78 @@ test "truncate to mutable with fewer limbs" {
19611961
.positive = undefined,
19621962
};
19631963
res.truncate(.{ .positive = true, .limbs = &.{ 0, 1 } }, .unsigned, @bitSizeOf(Limb));
1964-
try testing.expect(res.eqlZero());
1964+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
19651965
res.truncate(.{ .positive = true, .limbs = &.{ 0, 1 } }, .signed, @bitSizeOf(Limb));
1966-
try testing.expect(res.eqlZero());
1966+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
19671967
res.truncate(.{ .positive = false, .limbs = &.{ 0, 1 } }, .unsigned, @bitSizeOf(Limb));
1968-
try testing.expect(res.eqlZero());
1968+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
19691969
res.truncate(.{ .positive = false, .limbs = &.{ 0, 1 } }, .signed, @bitSizeOf(Limb));
1970-
try testing.expect(res.eqlZero());
1971-
res.truncate(.{ .positive = true, .limbs = &.{ std.math.maxInt(Limb), 1 } }, .unsigned, @bitSizeOf(Limb));
1972-
try testing.expect(res.toConst().orderAgainstScalar(std.math.maxInt(Limb)).compare(.eq));
1973-
res.truncate(.{ .positive = true, .limbs = &.{ std.math.maxInt(Limb), 1 } }, .signed, @bitSizeOf(Limb));
1970+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
1971+
res.truncate(.{ .positive = true, .limbs = &.{ maxInt(Limb), 1 } }, .unsigned, @bitSizeOf(Limb));
1972+
try testing.expect(res.toConst().orderAgainstScalar(maxInt(Limb)).compare(.eq));
1973+
res.truncate(.{ .positive = true, .limbs = &.{ maxInt(Limb), 1 } }, .signed, @bitSizeOf(Limb));
19741974
try testing.expect(res.toConst().orderAgainstScalar(-1).compare(.eq));
1975-
res.truncate(.{ .positive = false, .limbs = &.{ std.math.maxInt(Limb), 1 } }, .unsigned, @bitSizeOf(Limb));
1975+
res.truncate(.{ .positive = false, .limbs = &.{ maxInt(Limb), 1 } }, .unsigned, @bitSizeOf(Limb));
19761976
try testing.expect(res.toConst().orderAgainstScalar(1).compare(.eq));
1977-
res.truncate(.{ .positive = false, .limbs = &.{ std.math.maxInt(Limb), 1 } }, .signed, @bitSizeOf(Limb));
1977+
res.truncate(.{ .positive = false, .limbs = &.{ maxInt(Limb), 1 } }, .signed, @bitSizeOf(Limb));
19781978
try testing.expect(res.toConst().orderAgainstScalar(1).compare(.eq));
19791979
}
19801980

1981+
test "truncate value that normalizes after being masked" {
1982+
var res_limbs: [2]Limb = undefined;
1983+
var res: Mutable = .{
1984+
.limbs = &res_limbs,
1985+
.len = undefined,
1986+
.positive = undefined,
1987+
};
1988+
res.truncate(.{ .positive = true, .limbs = &.{ 0, 2 } }, .signed, 1 + @bitSizeOf(Limb));
1989+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
1990+
res.truncate(.{ .positive = true, .limbs = &.{ 1, 2 } }, .signed, 1 + @bitSizeOf(Limb));
1991+
try testing.expect(res.toConst().orderAgainstScalar(1).compare(.eq));
1992+
}
1993+
1994+
test "truncate to zero" {
1995+
var res_limbs: [1]Limb = undefined;
1996+
var res: Mutable = .{
1997+
.limbs = &res_limbs,
1998+
.len = undefined,
1999+
.positive = undefined,
2000+
};
2001+
res.truncate(.{ .positive = true, .limbs = &.{0} }, .signed, @bitSizeOf(Limb));
2002+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2003+
res.truncate(.{ .positive = false, .limbs = &.{0} }, .signed, @bitSizeOf(Limb));
2004+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2005+
res.truncate(.{ .positive = true, .limbs = &.{0} }, .unsigned, @bitSizeOf(Limb));
2006+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2007+
res.truncate(.{ .positive = false, .limbs = &.{0} }, .unsigned, @bitSizeOf(Limb));
2008+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2009+
res.truncate(.{ .positive = true, .limbs = &.{ 0, 1 } }, .signed, @bitSizeOf(Limb));
2010+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2011+
res.truncate(.{ .positive = false, .limbs = &.{ 0, 1 } }, .signed, @bitSizeOf(Limb));
2012+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2013+
res.truncate(.{ .positive = true, .limbs = &.{ 0, 1 } }, .unsigned, @bitSizeOf(Limb));
2014+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2015+
res.truncate(.{ .positive = false, .limbs = &.{ 0, 1 } }, .unsigned, @bitSizeOf(Limb));
2016+
try testing.expect(res.positive and res.len == 1 and res.limbs[0] == 0);
2017+
}
2018+
2019+
test "truncate to minimum signed integer" {
2020+
var res_limbs: [1]Limb = undefined;
2021+
var res: Mutable = .{
2022+
.limbs = &res_limbs,
2023+
.len = undefined,
2024+
.positive = undefined,
2025+
};
2026+
res.truncate(.{ .positive = true, .limbs = &.{1 << @bitSizeOf(Limb) - 1} }, .signed, @bitSizeOf(Limb));
2027+
try testing.expect(res.toConst().orderAgainstScalar(-1 << @bitSizeOf(Limb) - 1).compare(.eq));
2028+
res.truncate(.{ .positive = false, .limbs = &.{1 << @bitSizeOf(Limb) - 1} }, .signed, @bitSizeOf(Limb));
2029+
try testing.expect(res.toConst().orderAgainstScalar(-1 << @bitSizeOf(Limb) - 1).compare(.eq));
2030+
res.truncate(.{ .positive = true, .limbs = &.{1 << @bitSizeOf(Limb) - 1} }, .unsigned, @bitSizeOf(Limb));
2031+
try testing.expect(res.toConst().orderAgainstScalar(1 << @bitSizeOf(Limb) - 1).compare(.eq));
2032+
res.truncate(.{ .positive = false, .limbs = &.{1 << @bitSizeOf(Limb) - 1} }, .unsigned, @bitSizeOf(Limb));
2033+
try testing.expect(res.toConst().orderAgainstScalar(1 << @bitSizeOf(Limb) - 1).compare(.eq));
2034+
}
2035+
19812036
test "saturate single signed positive" {
19822037
var a = try Managed.initSet(testing.allocator, 0xBBBB_BBBB);
19832038
defer a.deinit();
@@ -2136,6 +2191,15 @@ test "shift-right negative" {
21362191
a.setSign(true);
21372192
try a.shiftRight(&arg7, 4);
21382193
try testing.expect(try a.toInt(i16) == -2048);
2194+
2195+
var arg8_limbs: [1]Limb = undefined;
2196+
var arg8: Mutable = .{
2197+
.limbs = &arg8_limbs,
2198+
.len = undefined,
2199+
.positive = undefined,
2200+
};
2201+
arg8.shiftRight(.{ .limbs = &.{ 1, 1 }, .positive = false }, @bitSizeOf(Limb));
2202+
try testing.expect(arg8.toConst().orderAgainstScalar(-2).compare(.eq));
21392203
}
21402204

21412205
test "sat shift-left simple unsigned" {

lib/std/zig/Zir.zig

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2136,7 +2136,7 @@ pub const Inst = struct {
21362136
ref_start_index = static_len,
21372137
_,
21382138

2139-
pub const static_len = 93;
2139+
pub const static_len = 97;
21402140

21412141
pub fn toRef(i: Index) Inst.Ref {
21422142
return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
@@ -2221,6 +2221,10 @@ pub const Inst = struct {
22212221
slice_const_u8_sentinel_0_type,
22222222
vector_16_i8_type,
22232223
vector_32_i8_type,
2224+
vector_1_u8_type,
2225+
vector_2_u8_type,
2226+
vector_4_u8_type,
2227+
vector_8_u8_type,
22242228
vector_16_u8_type,
22252229
vector_32_u8_type,
22262230
vector_8_i16_type,

src/Air.zig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,10 @@ pub const Inst = struct {
985985
slice_const_u8_sentinel_0_type = @intFromEnum(InternPool.Index.slice_const_u8_sentinel_0_type),
986986
vector_16_i8_type = @intFromEnum(InternPool.Index.vector_16_i8_type),
987987
vector_32_i8_type = @intFromEnum(InternPool.Index.vector_32_i8_type),
988+
vector_1_u8_type = @intFromEnum(InternPool.Index.vector_1_u8_type),
989+
vector_2_u8_type = @intFromEnum(InternPool.Index.vector_2_u8_type),
990+
vector_4_u8_type = @intFromEnum(InternPool.Index.vector_4_u8_type),
991+
vector_8_u8_type = @intFromEnum(InternPool.Index.vector_8_u8_type),
988992
vector_16_u8_type = @intFromEnum(InternPool.Index.vector_16_u8_type),
989993
vector_32_u8_type = @intFromEnum(InternPool.Index.vector_32_u8_type),
990994
vector_8_i16_type = @intFromEnum(InternPool.Index.vector_8_i16_type),

0 commit comments

Comments
 (0)