Skip to content

Commit 8c8e404

Browse files
JerryShihpalmer-dabbelt
authored andcommitted
crypto: riscv - add vector crypto accelerated SHA-{256,224}
Add an implementation of SHA-256 and SHA-224 using the Zvknha or Zvknhb extension. The assembly code is derived from OpenSSL code (openssl/openssl#21923) that was dual-licensed so that it could be reused in the kernel. Nevertheless, the assembly has been significantly reworked for integration with the kernel, for example by using a regular .S file instead of the so-called perlasm, using the assembler instead of bare '.inst', and greatly reducing code duplication. Co-developed-by: Charalampos Mitrodimas <[email protected]> Signed-off-by: Charalampos Mitrodimas <[email protected]> Co-developed-by: Heiko Stuebner <[email protected]> Signed-off-by: Heiko Stuebner <[email protected]> Co-developed-by: Phoebe Chen <[email protected]> Signed-off-by: Phoebe Chen <[email protected]> Signed-off-by: Jerry Shih <[email protected]> Co-developed-by: Eric Biggers <[email protected]> Signed-off-by: Eric Biggers <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 600a385 commit 8c8e404

File tree

4 files changed

+376
-0
lines changed

4 files changed

+376
-0
lines changed

arch/riscv/crypto/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,15 @@ config CRYPTO_GHASH_RISCV64
3939
Architecture: riscv64 using:
4040
- Zvkg vector crypto extension
4141

42+
config CRYPTO_SHA256_RISCV64
43+
tristate "Hash functions: SHA-224 and SHA-256"
44+
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
45+
select CRYPTO_SHA256
46+
help
47+
SHA-224 and SHA-256 secure hash algorithm (FIPS 180)
48+
49+
Architecture: riscv64 using:
50+
- Zvknha or Zvknhb vector crypto extensions
51+
- Zvkb vector crypto extension
52+
4253
endmenu

arch/riscv/crypto/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,6 @@ chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
99

1010
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
1111
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
12+
13+
obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
14+
sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
/*
3+
* SHA-256 and SHA-224 using the RISC-V vector crypto extensions
4+
*
5+
* Copyright (C) 2022 VRULL GmbH
6+
* Author: Heiko Stuebner <[email protected]>
7+
*
8+
* Copyright (C) 2023 SiFive, Inc.
9+
* Author: Jerry Shih <[email protected]>
10+
*/
11+
12+
#include <asm/simd.h>
13+
#include <asm/vector.h>
14+
#include <crypto/internal/hash.h>
15+
#include <crypto/internal/simd.h>
16+
#include <crypto/sha256_base.h>
17+
#include <linux/linkage.h>
18+
#include <linux/module.h>
19+
20+
/*
21+
* Note: the asm function only uses the 'state' field of struct sha256_state.
22+
* It is assumed to be the first field.
23+
*/
24+
asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
25+
struct sha256_state *state, const u8 *data, int num_blocks);
26+
27+
static int riscv64_sha256_update(struct shash_desc *desc, const u8 *data,
28+
unsigned int len)
29+
{
30+
/*
31+
* Ensure struct sha256_state begins directly with the SHA-256
32+
* 256-bit internal state, as this is what the asm function expects.
33+
*/
34+
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
35+
36+
if (crypto_simd_usable()) {
37+
kernel_vector_begin();
38+
sha256_base_do_update(desc, data, len,
39+
sha256_transform_zvknha_or_zvknhb_zvkb);
40+
kernel_vector_end();
41+
} else {
42+
crypto_sha256_update(desc, data, len);
43+
}
44+
return 0;
45+
}
46+
47+
static int riscv64_sha256_finup(struct shash_desc *desc, const u8 *data,
48+
unsigned int len, u8 *out)
49+
{
50+
if (crypto_simd_usable()) {
51+
kernel_vector_begin();
52+
if (len)
53+
sha256_base_do_update(
54+
desc, data, len,
55+
sha256_transform_zvknha_or_zvknhb_zvkb);
56+
sha256_base_do_finalize(
57+
desc, sha256_transform_zvknha_or_zvknhb_zvkb);
58+
kernel_vector_end();
59+
60+
return sha256_base_finish(desc, out);
61+
}
62+
63+
return crypto_sha256_finup(desc, data, len, out);
64+
}
65+
66+
static int riscv64_sha256_final(struct shash_desc *desc, u8 *out)
67+
{
68+
return riscv64_sha256_finup(desc, NULL, 0, out);
69+
}
70+
71+
static int riscv64_sha256_digest(struct shash_desc *desc, const u8 *data,
72+
unsigned int len, u8 *out)
73+
{
74+
return sha256_base_init(desc) ?:
75+
riscv64_sha256_finup(desc, data, len, out);
76+
}
77+
78+
static struct shash_alg riscv64_sha256_algs[] = {
79+
{
80+
.init = sha256_base_init,
81+
.update = riscv64_sha256_update,
82+
.final = riscv64_sha256_final,
83+
.finup = riscv64_sha256_finup,
84+
.digest = riscv64_sha256_digest,
85+
.descsize = sizeof(struct sha256_state),
86+
.digestsize = SHA256_DIGEST_SIZE,
87+
.base = {
88+
.cra_blocksize = SHA256_BLOCK_SIZE,
89+
.cra_priority = 300,
90+
.cra_name = "sha256",
91+
.cra_driver_name = "sha256-riscv64-zvknha_or_zvknhb-zvkb",
92+
.cra_module = THIS_MODULE,
93+
},
94+
}, {
95+
.init = sha224_base_init,
96+
.update = riscv64_sha256_update,
97+
.final = riscv64_sha256_final,
98+
.finup = riscv64_sha256_finup,
99+
.descsize = sizeof(struct sha256_state),
100+
.digestsize = SHA224_DIGEST_SIZE,
101+
.base = {
102+
.cra_blocksize = SHA224_BLOCK_SIZE,
103+
.cra_priority = 300,
104+
.cra_name = "sha224",
105+
.cra_driver_name = "sha224-riscv64-zvknha_or_zvknhb-zvkb",
106+
.cra_module = THIS_MODULE,
107+
},
108+
},
109+
};
110+
111+
static int __init riscv64_sha256_mod_init(void)
112+
{
113+
/* Both zvknha and zvknhb provide the SHA-256 instructions. */
114+
if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
115+
riscv_isa_extension_available(NULL, ZVKNHB)) &&
116+
riscv_isa_extension_available(NULL, ZVKB) &&
117+
riscv_vector_vlen() >= 128)
118+
return crypto_register_shashes(riscv64_sha256_algs,
119+
ARRAY_SIZE(riscv64_sha256_algs));
120+
121+
return -ENODEV;
122+
}
123+
124+
static void __exit riscv64_sha256_mod_exit(void)
125+
{
126+
crypto_unregister_shashes(riscv64_sha256_algs,
127+
ARRAY_SIZE(riscv64_sha256_algs));
128+
}
129+
130+
module_init(riscv64_sha256_mod_init);
131+
module_exit(riscv64_sha256_mod_exit);
132+
133+
MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
134+
MODULE_AUTHOR("Heiko Stuebner <[email protected]>");
135+
MODULE_LICENSE("GPL");
136+
MODULE_ALIAS_CRYPTO("sha256");
137+
MODULE_ALIAS_CRYPTO("sha224");
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2+
//
3+
// This file is dual-licensed, meaning that you can use it under your
4+
// choice of either of the following two licenses:
5+
//
6+
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7+
//
8+
// Licensed under the Apache License 2.0 (the "License"). You can obtain
9+
// a copy in the file LICENSE in the source distribution or at
10+
// https://www.openssl.org/source/license.html
11+
//
12+
// or
13+
//
14+
// Copyright (c) 2023, Christoph Müllner <[email protected]>
15+
// Copyright (c) 2023, Phoebe Chen <[email protected]>
16+
// Copyright 2024 Google LLC
17+
// All rights reserved.
18+
//
19+
// Redistribution and use in source and binary forms, with or without
20+
// modification, are permitted provided that the following conditions
21+
// are met:
22+
// 1. Redistributions of source code must retain the above copyright
23+
// notice, this list of conditions and the following disclaimer.
24+
// 2. Redistributions in binary form must reproduce the above copyright
25+
// notice, this list of conditions and the following disclaimer in the
26+
// documentation and/or other materials provided with the distribution.
27+
//
28+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39+
40+
// The generated code of this file depends on the following RISC-V extensions:
41+
// - RV64I
42+
// - RISC-V Vector ('V') with VLEN >= 128
43+
// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
44+
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
45+
46+
#include <linux/cfi_types.h>
47+
48+
.text
49+
.option arch, +zvknha, +zvkb
50+
51+
#define STATEP a0
52+
#define DATA a1
53+
#define NUM_BLOCKS a2
54+
55+
#define STATEP_C a3
56+
57+
#define MASK v0
58+
#define INDICES v1
59+
#define W0 v2
60+
#define W1 v3
61+
#define W2 v4
62+
#define W3 v5
63+
#define VTMP v6
64+
#define FEBA v7
65+
#define HGDC v8
66+
#define K0 v10
67+
#define K1 v11
68+
#define K2 v12
69+
#define K3 v13
70+
#define K4 v14
71+
#define K5 v15
72+
#define K6 v16
73+
#define K7 v17
74+
#define K8 v18
75+
#define K9 v19
76+
#define K10 v20
77+
#define K11 v21
78+
#define K12 v22
79+
#define K13 v23
80+
#define K14 v24
81+
#define K15 v25
82+
#define PREV_FEBA v26
83+
#define PREV_HGDC v27
84+
85+
// Do 4 rounds of SHA-256. w0 contains the current 4 message schedule words.
86+
//
87+
// If not all the message schedule words have been computed yet, then this also
88+
// computes 4 more message schedule words. w1-w3 contain the next 3 groups of 4
89+
// message schedule words; this macro computes the group after w3 and writes it
90+
// to w0. This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
91+
// w0), so the caller must cycle through the registers accordingly.
92+
.macro sha256_4rounds last, k, w0, w1, w2, w3
93+
vadd.vv VTMP, \k, \w0
94+
vsha2cl.vv HGDC, FEBA, VTMP
95+
vsha2ch.vv FEBA, HGDC, VTMP
96+
.if !\last
97+
vmerge.vvm VTMP, \w2, \w1, MASK
98+
vsha2ms.vv \w0, VTMP, \w3
99+
.endif
100+
.endm
101+
102+
.macro sha256_16rounds last, k0, k1, k2, k3
103+
sha256_4rounds \last, \k0, W0, W1, W2, W3
104+
sha256_4rounds \last, \k1, W1, W2, W3, W0
105+
sha256_4rounds \last, \k2, W2, W3, W0, W1
106+
sha256_4rounds \last, \k3, W3, W0, W1, W2
107+
.endm
108+
109+
// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
110+
// int num_blocks);
111+
SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
112+
113+
// Load the round constants into K0-K15.
114+
vsetivli zero, 4, e32, m1, ta, ma
115+
la t0, K256
116+
vle32.v K0, (t0)
117+
addi t0, t0, 16
118+
vle32.v K1, (t0)
119+
addi t0, t0, 16
120+
vle32.v K2, (t0)
121+
addi t0, t0, 16
122+
vle32.v K3, (t0)
123+
addi t0, t0, 16
124+
vle32.v K4, (t0)
125+
addi t0, t0, 16
126+
vle32.v K5, (t0)
127+
addi t0, t0, 16
128+
vle32.v K6, (t0)
129+
addi t0, t0, 16
130+
vle32.v K7, (t0)
131+
addi t0, t0, 16
132+
vle32.v K8, (t0)
133+
addi t0, t0, 16
134+
vle32.v K9, (t0)
135+
addi t0, t0, 16
136+
vle32.v K10, (t0)
137+
addi t0, t0, 16
138+
vle32.v K11, (t0)
139+
addi t0, t0, 16
140+
vle32.v K12, (t0)
141+
addi t0, t0, 16
142+
vle32.v K13, (t0)
143+
addi t0, t0, 16
144+
vle32.v K14, (t0)
145+
addi t0, t0, 16
146+
vle32.v K15, (t0)
147+
148+
// Setup mask for the vmerge to replace the first word (idx==0) in
149+
// message scheduling. There are 4 words, so an 8-bit mask suffices.
150+
vsetivli zero, 1, e8, m1, ta, ma
151+
vmv.v.i MASK, 0x01
152+
153+
// Load the state. The state is stored as {a,b,c,d,e,f,g,h}, but we
154+
// need {f,e,b,a},{h,g,d,c}. The dst vtype is e32m1 and the index vtype
155+
// is e8mf4. We use index-load with the i8 indices {20, 16, 4, 0},
156+
// loaded using the 32-bit little endian value 0x00041014.
157+
li t0, 0x00041014
158+
vsetivli zero, 1, e32, m1, ta, ma
159+
vmv.v.x INDICES, t0
160+
addi STATEP_C, STATEP, 8
161+
vsetivli zero, 4, e32, m1, ta, ma
162+
vluxei8.v FEBA, (STATEP), INDICES
163+
vluxei8.v HGDC, (STATEP_C), INDICES
164+
165+
.Lnext_block:
166+
addi NUM_BLOCKS, NUM_BLOCKS, -1
167+
168+
// Save the previous state, as it's needed later.
169+
vmv.v.v PREV_FEBA, FEBA
170+
vmv.v.v PREV_HGDC, HGDC
171+
172+
// Load the next 512-bit message block and endian-swap each 32-bit word.
173+
vle32.v W0, (DATA)
174+
vrev8.v W0, W0
175+
addi DATA, DATA, 16
176+
vle32.v W1, (DATA)
177+
vrev8.v W1, W1
178+
addi DATA, DATA, 16
179+
vle32.v W2, (DATA)
180+
vrev8.v W2, W2
181+
addi DATA, DATA, 16
182+
vle32.v W3, (DATA)
183+
vrev8.v W3, W3
184+
addi DATA, DATA, 16
185+
186+
// Do the 64 rounds of SHA-256.
187+
sha256_16rounds 0, K0, K1, K2, K3
188+
sha256_16rounds 0, K4, K5, K6, K7
189+
sha256_16rounds 0, K8, K9, K10, K11
190+
sha256_16rounds 1, K12, K13, K14, K15
191+
192+
// Add the previous state.
193+
vadd.vv FEBA, FEBA, PREV_FEBA
194+
vadd.vv HGDC, HGDC, PREV_HGDC
195+
196+
// Repeat if more blocks remain.
197+
bnez NUM_BLOCKS, .Lnext_block
198+
199+
// Store the new state and return.
200+
vsuxei8.v FEBA, (STATEP), INDICES
201+
vsuxei8.v HGDC, (STATEP_C), INDICES
202+
ret
203+
SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
204+
205+
.section ".rodata"
206+
.p2align 2
207+
.type K256, @object
208+
K256:
209+
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
210+
.word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
211+
.word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
212+
.word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
213+
.word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
214+
.word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
215+
.word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
216+
.word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
217+
.word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
218+
.word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
219+
.word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
220+
.word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
221+
.word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
222+
.word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
223+
.word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
224+
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
225+
.size K256, . - K256

0 commit comments

Comments
 (0)