Skip to content

Commit 6093faa

Browse files
Chunyan Zhangpalmer-dabbelt
authored andcommitted
raid6: Add RISC-V SIMD syndrome and recovery calculations
The assembly is originally based on the ARM NEON and int.uc, but uses RISC-V vector instructions to implement the RAID6 syndrome and recovery calculations. The functions are tested on QEMU running with the option "-icount shift=0": raid6: rvvx1 gen() 1008 MB/s raid6: rvvx2 gen() 1395 MB/s raid6: rvvx4 gen() 1584 MB/s raid6: rvvx8 gen() 1694 MB/s raid6: int64x8 gen() 113 MB/s raid6: int64x4 gen() 116 MB/s raid6: int64x2 gen() 272 MB/s raid6: int64x1 gen() 229 MB/s raid6: using algorithm rvvx8 gen() 1694 MB/s raid6: .... xor() 1000 MB/s, rmw enabled raid6: using rvv recovery algorithm [Charlie: - Fixup vector options] Signed-off-by: Charlie Jenkins <[email protected]> Signed-off-by: Chunyan Zhang <[email protected]> Reviewed-by: Charlie Jenkins <[email protected]> Tested-by: Charlie Jenkins <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexandre Ghiti <[email protected]> Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent a569726 commit 6093faa

File tree

6 files changed

+1495
-0
lines changed

6 files changed

+1495
-0
lines changed

include/linux/raid/pq.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ extern const struct raid6_calls raid6_vpermxor4;
108108
extern const struct raid6_calls raid6_vpermxor8;
109109
extern const struct raid6_calls raid6_lsx;
110110
extern const struct raid6_calls raid6_lasx;
111+
extern const struct raid6_calls raid6_rvvx1;
112+
extern const struct raid6_calls raid6_rvvx2;
113+
extern const struct raid6_calls raid6_rvvx4;
114+
extern const struct raid6_calls raid6_rvvx8;
111115

112116
struct raid6_recov_calls {
113117
void (*data2)(int, size_t, int, int, void **);
@@ -125,6 +129,7 @@ extern const struct raid6_recov_calls raid6_recov_s390xc;
125129
extern const struct raid6_recov_calls raid6_recov_neon;
126130
extern const struct raid6_recov_calls raid6_recov_lsx;
127131
extern const struct raid6_recov_calls raid6_recov_lasx;
132+
extern const struct raid6_recov_calls raid6_recov_rvv;
128133

129134
extern const struct raid6_calls raid6_neonx1;
130135
extern const struct raid6_calls raid6_neonx2;

lib/raid6/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
1010
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
1111
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
1212
raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
13+
raid6_pq-$(CONFIG_RISCV_ISA_V) += rvv.o recov_rvv.o
1314

1415
hostprogs += mktables
1516

lib/raid6/algos.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ const struct raid6_calls * const raid6_algos[] = {
8080
#ifdef CONFIG_CPU_HAS_LSX
8181
&raid6_lsx,
8282
#endif
83+
#endif
84+
#ifdef CONFIG_RISCV_ISA_V
85+
&raid6_rvvx1,
86+
&raid6_rvvx2,
87+
&raid6_rvvx4,
88+
&raid6_rvvx8,
8389
#endif
8490
&raid6_intx8,
8591
&raid6_intx4,
@@ -115,6 +121,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
115121
#ifdef CONFIG_CPU_HAS_LSX
116122
&raid6_recov_lsx,
117123
#endif
124+
#endif
125+
#ifdef CONFIG_RISCV_ISA_V
126+
&raid6_recov_rvv,
118127
#endif
119128
&raid6_recov_intx1,
120129
NULL

lib/raid6/recov_rvv.c

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright 2024 Institute of Software, CAS.
4+
* Author: Chunyan Zhang <[email protected]>
5+
*/
6+
7+
#include <asm/simd.h>
8+
#include <asm/vector.h>
9+
#include <crypto/internal/simd.h>
10+
#include <linux/raid/pq.h>
11+
12+
static int rvv_has_vector(void)
13+
{
14+
return has_vector();
15+
}
16+
17+
static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp,
18+
u8 *dq, const u8 *pbmul,
19+
const u8 *qmul)
20+
{
21+
asm volatile (".option push\n"
22+
".option arch,+v\n"
23+
"vsetvli x0, %[avl], e8, m1, ta, ma\n"
24+
".option pop\n"
25+
: :
26+
[avl]"r"(16)
27+
);
28+
29+
/*
30+
* while ( bytes-- ) {
31+
* uint8_t px, qx, db;
32+
*
33+
* px = *p ^ *dp;
34+
* qx = qmul[*q ^ *dq];
35+
* *dq++ = db = pbmul[px] ^ qx;
36+
* *dp++ = db ^ px;
37+
* p++; q++;
38+
* }
39+
*/
40+
while (bytes) {
41+
/*
42+
* v0:px, v1:dp,
43+
* v2:qx, v3:dq,
44+
* v4:vx, v5:vy,
45+
* v6:qm0, v7:qm1,
46+
* v8:pm0, v9:pm1,
47+
* v14:p/qm[vx], v15:p/qm[vy]
48+
*/
49+
asm volatile (".option push\n"
50+
".option arch,+v\n"
51+
"vle8.v v0, (%[px])\n"
52+
"vle8.v v1, (%[dp])\n"
53+
"vxor.vv v0, v0, v1\n"
54+
"vle8.v v2, (%[qx])\n"
55+
"vle8.v v3, (%[dq])\n"
56+
"vxor.vv v4, v2, v3\n"
57+
"vsrl.vi v5, v4, 4\n"
58+
"vand.vi v4, v4, 0xf\n"
59+
"vle8.v v6, (%[qm0])\n"
60+
"vle8.v v7, (%[qm1])\n"
61+
"vrgather.vv v14, v6, v4\n" /* v14 = qm[vx] */
62+
"vrgather.vv v15, v7, v5\n" /* v15 = qm[vy] */
63+
"vxor.vv v2, v14, v15\n" /* v2 = qmul[*q ^ *dq] */
64+
65+
"vsrl.vi v5, v0, 4\n"
66+
"vand.vi v4, v0, 0xf\n"
67+
"vle8.v v8, (%[pm0])\n"
68+
"vle8.v v9, (%[pm1])\n"
69+
"vrgather.vv v14, v8, v4\n" /* v14 = pm[vx] */
70+
"vrgather.vv v15, v9, v5\n" /* v15 = pm[vy] */
71+
"vxor.vv v4, v14, v15\n" /* v4 = pbmul[px] */
72+
"vxor.vv v3, v4, v2\n" /* v3 = db = pbmul[px] ^ qx */
73+
"vxor.vv v1, v3, v0\n" /* v1 = db ^ px; */
74+
"vse8.v v3, (%[dq])\n"
75+
"vse8.v v1, (%[dp])\n"
76+
".option pop\n"
77+
: :
78+
[px]"r"(p),
79+
[dp]"r"(dp),
80+
[qx]"r"(q),
81+
[dq]"r"(dq),
82+
[qm0]"r"(qmul),
83+
[qm1]"r"(qmul + 16),
84+
[pm0]"r"(pbmul),
85+
[pm1]"r"(pbmul + 16)
86+
:);
87+
88+
bytes -= 16;
89+
p += 16;
90+
q += 16;
91+
dp += 16;
92+
dq += 16;
93+
}
94+
}
95+
96+
static void __raid6_datap_recov_rvv(int bytes, u8 *p, u8 *q,
97+
u8 *dq, const u8 *qmul)
98+
{
99+
asm volatile (".option push\n"
100+
".option arch,+v\n"
101+
"vsetvli x0, %[avl], e8, m1, ta, ma\n"
102+
".option pop\n"
103+
: :
104+
[avl]"r"(16)
105+
);
106+
107+
/*
108+
* while (bytes--) {
109+
* *p++ ^= *dq = qmul[*q ^ *dq];
110+
* q++; dq++;
111+
* }
112+
*/
113+
while (bytes) {
114+
/*
115+
* v0:vx, v1:vy,
116+
* v2:dq, v3:p,
117+
* v4:qm0, v5:qm1,
118+
* v10:m[vx], v11:m[vy]
119+
*/
120+
asm volatile (".option push\n"
121+
".option arch,+v\n"
122+
"vle8.v v0, (%[vx])\n"
123+
"vle8.v v2, (%[dq])\n"
124+
"vxor.vv v0, v0, v2\n"
125+
"vsrl.vi v1, v0, 4\n"
126+
"vand.vi v0, v0, 0xf\n"
127+
"vle8.v v4, (%[qm0])\n"
128+
"vle8.v v5, (%[qm1])\n"
129+
"vrgather.vv v10, v4, v0\n"
130+
"vrgather.vv v11, v5, v1\n"
131+
"vxor.vv v0, v10, v11\n"
132+
"vle8.v v1, (%[vy])\n"
133+
"vxor.vv v1, v0, v1\n"
134+
"vse8.v v0, (%[dq])\n"
135+
"vse8.v v1, (%[vy])\n"
136+
".option pop\n"
137+
: :
138+
[vx]"r"(q),
139+
[vy]"r"(p),
140+
[dq]"r"(dq),
141+
[qm0]"r"(qmul),
142+
[qm1]"r"(qmul + 16)
143+
:);
144+
145+
bytes -= 16;
146+
p += 16;
147+
q += 16;
148+
dq += 16;
149+
}
150+
}
151+
152+
static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila,
153+
int failb, void **ptrs)
154+
{
155+
u8 *p, *q, *dp, *dq;
156+
const u8 *pbmul; /* P multiplier table for B data */
157+
const u8 *qmul; /* Q multiplier table (for both) */
158+
159+
p = (u8 *)ptrs[disks - 2];
160+
q = (u8 *)ptrs[disks - 1];
161+
162+
/*
163+
* Compute syndrome with zero for the missing data pages
164+
* Use the dead data pages as temporary storage for
165+
* delta p and delta q
166+
*/
167+
dp = (u8 *)ptrs[faila];
168+
ptrs[faila] = (void *)raid6_empty_zero_page;
169+
ptrs[disks - 2] = dp;
170+
dq = (u8 *)ptrs[failb];
171+
ptrs[failb] = (void *)raid6_empty_zero_page;
172+
ptrs[disks - 1] = dq;
173+
174+
raid6_call.gen_syndrome(disks, bytes, ptrs);
175+
176+
/* Restore pointer table */
177+
ptrs[faila] = dp;
178+
ptrs[failb] = dq;
179+
ptrs[disks - 2] = p;
180+
ptrs[disks - 1] = q;
181+
182+
/* Now, pick the proper data tables */
183+
pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
184+
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
185+
raid6_gfexp[failb]]];
186+
187+
kernel_vector_begin();
188+
__raid6_2data_recov_rvv(bytes, p, q, dp, dq, pbmul, qmul);
189+
kernel_vector_end();
190+
}
191+
192+
static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila,
193+
void **ptrs)
194+
{
195+
u8 *p, *q, *dq;
196+
const u8 *qmul; /* Q multiplier table */
197+
198+
p = (u8 *)ptrs[disks - 2];
199+
q = (u8 *)ptrs[disks - 1];
200+
201+
/*
202+
* Compute syndrome with zero for the missing data page
203+
* Use the dead data page as temporary storage for delta q
204+
*/
205+
dq = (u8 *)ptrs[faila];
206+
ptrs[faila] = (void *)raid6_empty_zero_page;
207+
ptrs[disks - 1] = dq;
208+
209+
raid6_call.gen_syndrome(disks, bytes, ptrs);
210+
211+
/* Restore pointer table */
212+
ptrs[faila] = dq;
213+
ptrs[disks - 1] = q;
214+
215+
/* Now, pick the proper data tables */
216+
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
217+
218+
kernel_vector_begin();
219+
__raid6_datap_recov_rvv(bytes, p, q, dq, qmul);
220+
kernel_vector_end();
221+
}
222+
223+
const struct raid6_recov_calls raid6_recov_rvv = {
224+
.data2 = raid6_2data_recov_rvv,
225+
.datap = raid6_datap_recov_rvv,
226+
.valid = rvv_has_vector,
227+
.name = "rvv",
228+
.priority = 1,
229+
};

0 commit comments

Comments
 (0)