|
71 | 71 | a.data, b.data, |
72 | 72 | ) |> uint8x16 |
73 | 73 |
|
| 74 | +const uint32x4_lvec = NTuple{4, VecElement{UInt32}} |
| 75 | +struct uint32x4 |
| 76 | + data::uint32x4_lvec |
| 77 | +end |
| 78 | +@inline Base.convert(::Type{uint64x2}, x::uint32x4) = unsafe_load(Ptr{uint64x2}(pointer_from_objref(Ref(x)))) |
| 79 | +@inline Base.convert(::Type{uint32x4}, x::uint64x2) = unsafe_load(Ptr{uint32x4}(pointer_from_objref(Ref(x)))) |
| 80 | +@inline uint32x4(x::uint64x2) = convert(uint32x4, x) |
| 81 | +@inline uint64x2(x::uint32x4) = convert(uint64x2, x) |
| 82 | +@inline Base.convert(::Type{uint8x16}, x::uint32x4) = unsafe_load(Ptr{uint8x16}(pointer_from_objref(Ref(x)))) |
| 83 | +@inline Base.convert(::Type{uint32x4}, x::uint8x16) = unsafe_load(Ptr{uint32x4}(pointer_from_objref(Ref(x)))) |
| 84 | +@inline uint32x4(x::uint8x16) = convert(uint32x4, x) |
| 85 | +@inline uint8x16(x::uint32x4) = convert(uint8x16, x) |
| 86 | +@inline Base.convert(::Type{uint32x4}, x::UInt128) = unsafe_load(Ptr{uint32x4}(pointer_from_objref(Ref(x)))) |
| 87 | +@inline Base.convert(::Type{UInt128}, x::uint32x4) = unsafe_load(Ptr{UInt128}(pointer_from_objref(Ref(x)))) |
| 88 | +@inline UInt128(x::uint32x4) = convert(UInt128, x) |
| 89 | +@inline uint32x4(x::UInt128) = convert(uint32x4, x) |
| 90 | +@inline Base.convert(::Type{uint32x4}, x::Union{Signed, Unsigned}) = convert(uint32x4, UInt128(x)) |
| 91 | +@inline Base.convert(::Type{T}, x::uint32x4) where T <: Union{Signed, Unsigned} = convert(T, UInt128(x)) |
| 92 | + |
| 93 | +@inline function uint32x4(bytes::Vararg{UInt32, 4}) |
| 94 | + bytes_prepped = bytes |
| 95 | + @static if LITTLE_ENDIAN |
| 96 | + bytes_prepped = reverse(bytes_prepped) |
| 97 | + end |
| 98 | + bytes_vec::uint32x4_lvec = VecElement.(bytes_prepped) |
| 99 | + return uint32x4(bytes_vec) |
| 100 | +end |
| 101 | + |
| 102 | +@inline Base.zero(::Type{uint32x4}) = convert(uint32x4, 0) |
| 103 | +@inline Base.xor(a::uint32x4, b::uint32x4) = llvmcall( |
| 104 | + """%3 = xor <4 x i32> %1, %0 |
| 105 | + ret <4 x i32> %3""", |
| 106 | + uint32x4_lvec, Tuple{uint32x4_lvec, uint32x4_lvec}, |
| 107 | + a.data, b.data, |
| 108 | +) |> uint32x4 |
| 109 | + |
74 | 110 | # Raw NEON instrinsics, provided by FEAT_AES |
75 | 111 | @inline _vaese(a::uint8x16, b::uint8x16) = ccall( |
76 | 112 | "llvm.aarch64.crypto.aese", |
|
126 | 162 | end |
127 | 163 | @inline function _aes_key_gen_assist(a::uint64x2, ::Val{R}) where {R} |
128 | 164 | res = _aes_key_gen_shuffle_helper(_vaese(uint8x16(a), zero(uint8x16))) |
129 | | - r = R % UInt64 |
130 | | - return uint64x2(res) ⊻ uint64x2(r, r) |
| 165 | + r = R % UInt32 |
| 166 | + z = zero(UInt32) |
| 167 | + return uint64x2(res) ⊻ uint64x2(uint32x4(r, z, r, z)) |
131 | 168 | end |
132 | 169 |
|
133 | 170 | """ |
|
0 commit comments