Skip to content

Commit 2bc86e1

Browse files
committed
Generalize bit manipulation to larger integers than UInt128
This is useful for other packages that may want to hook into this machinery, e.g. Kmers
1 parent d6f2b61 commit 2bc86e1

File tree

1 file changed

+43
-19
lines changed

1 file changed

+43
-19
lines changed

src/bit-manipulation/bit-manipulation.jl

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,59 @@
1-
const BitUnsigned = Union{UInt8, UInt16, UInt32, UInt64, UInt128}
1+
const BitUnsigned = Union{UInt8, UInt16, UInt32, UInt64}
2+
3+
@inline reversebits(x::Unsigned, ::BitsPerSymbol{8}) = bswap(x)
24

35
@inline function reversebits(x::T, ::BitsPerSymbol{2}) where T <: BitUnsigned
4-
mask = 0x33333333333333333333333333333333 % T
5-
x = ((x >> 2) & mask) | ((x & mask) << 2)
6-
return reversebits(x, BitsPerSymbol{4}())
6+
mask = 0x3333333333333333 % T
7+
x = ((x >> 2) & mask) | ((x & mask) << 2)
8+
return reversebits(x, BitsPerSymbol{4}())
79
end
810

911
@inline function reversebits(x::T, ::BitsPerSymbol{4}) where T <: BitUnsigned
10-
mask = 0x0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F % T
12+
mask = 0x0F0F0F0F0F0F0F0F % T
1113
x = ((x >> 4) & mask) | ((x & mask) << 4)
1214
return reversebits(x, BitsPerSymbol{8}())
1315
end
1416

15-
@inline reversebits(x::T, ::BitsPerSymbol{8}) where T <: BitUnsigned = bswap(x)
16-
1717
@inline reversebits(x::UInt16, ::BitsPerSymbol{16}) = x
1818
@inline function reversebits(x::T, ::BitsPerSymbol{16}) where T <: Union{UInt32, UInt64}
19-
mask = 0x0000FFFF0000FFFF0000FFFF0000FFFF % T
19+
mask = 0x0000FFFF0000FFFF % T
2020
x = ((x >> 16) & mask) | ((x & mask) << 16)
2121
reversebits(x, BitsPerSymbol{32}())
2222
end
2323

2424
@inline reversebits(x::UInt32, ::BitsPerSymbol{32}) = x
25-
@inline function reversebits(x::T, ::BitsPerSymbol{32}) where T <: Union{UInt64}
26-
mask = 0x00000000FFFFFFF00000000FFFFFFFF % T
25+
@inline function reversebits(x::UInt64, ::BitsPerSymbol{32})
26+
mask = 0x00000000FFFFFFF
2727
x = ((x >> 32) & mask) | ((x & mask) << 32)
2828
reversebits(x, BitsPerSymbol{64}())
2929
end
3030

3131
@inline reversebits(x::UInt64, ::BitsPerSymbol{64}) = x
3232

33+
# Generic method for large integers, or odd-sized integers
34+
@inline function reversebits(x::Unsigned, b::BitsPerSymbol)
35+
if sizeof(x) < 8
36+
# If smaller than UInt64, we convert to UInt64, reverse, and then
37+
# shift down and truncate.
38+
u = reversebits(x % UInt64, b)
39+
u >>= 64 - 8 * sizeof(x)
40+
return u % typeof(x)
41+
else
42+
# Else, we reverse 64 bits at a time
43+
(d, r) = divrem(sizeof(x), 8)
44+
iszero(r) || error("Can only work with Unsigned types a multiple of 8 bytes, or smaller than UInt64")
45+
u = zero(typeof(x))
46+
shiftmask = 8 * sizeof(x) - 1
47+
for i in 0:d-1
48+
shift = i * 64
49+
rv = reversebits((x >> (shift & shiftmask)) % UInt64, b)
50+
u |= (rv % typeof(x)) << ((8 * sizeof(x) - 64 - shift) & shiftmask)
51+
end
52+
return u
53+
end
54+
end
55+
56+
3357
@inline function complement_bitpar(x::Unsigned, ::T) where {T<:NucleicAcidAlphabet{2}}
3458
return ~x
3559
end
@@ -68,17 +92,17 @@ end
6892
@inline sum(map(i -> f(i.value), z))
6993
end
7094

71-
pattern(::BitsPerSymbol{1}) = typemax(UInt128)
72-
pattern(::BitsPerSymbol{2}) = 0x55555555555555555555555555555555
73-
pattern(::BitsPerSymbol{4}) = 0x11111111111111111111111111111111
74-
pattern(::BitsPerSymbol{8}) = 0x01010101010101010101010101010101
75-
pattern(::BitsPerSymbol{16}) = 0x00010001000100010001000100010001
76-
pattern(::BitsPerSymbol{32}) = 0x00000001000000010000000100000001
77-
pattern(::BitsPerSymbol{64}) = 0x00000000000000010000000000000001
78-
pattern(::BitsPerSymbol{128}) = 0x00000000000000000000000000000001
95+
pattern(::BitsPerSymbol{1}) = 0xff
96+
pattern(::BitsPerSymbol{2}) = 0x55
97+
pattern(::BitsPerSymbol{4}) = 0x11
98+
pattern(::BitsPerSymbol{8}) = 0x01
99+
pattern(::BitsPerSymbol{16}) = 0x0001
100+
pattern(::BitsPerSymbol{32}) = 0x00000001
101+
pattern(::BitsPerSymbol{64}) = 0x0000000000000001
79102

103+
# Count the number of times the B lower bits of `encoding` is present in `chunk`
80104
function count_encoding(chunk::T, encoding::T, b::BitsPerSymbol{B}) where {T <: Unsigned, B}
81-
pat = pattern(b) % typeof(encoding)
105+
pat = repeatpattern(typeof(encoding), pattern(b))
82106
u = chunk (encoding * pat)
83107
for i in 1:trailing_zeros(B)
84108
shift = (1 << (i - 1)) & (8*sizeof(T) - 1)

0 commit comments

Comments
 (0)