|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
| 2 | +/* |
| 3 | + * Copyright 2024 Institute of Software, CAS. |
| 4 | + * Author: Chunyan Zhang <[email protected]> |
| 5 | + */ |
| 6 | + |
| 7 | +#include <asm/simd.h> |
| 8 | +#include <asm/vector.h> |
| 9 | +#include <crypto/internal/simd.h> |
| 10 | +#include <linux/raid/pq.h> |
| 11 | + |
| 12 | +static int rvv_has_vector(void) |
| 13 | +{ |
| 14 | + return has_vector(); |
| 15 | +} |
| 16 | + |
| 17 | +static void __raid6_2data_recov_rvv(int bytes, u8 *p, u8 *q, u8 *dp, |
| 18 | + u8 *dq, const u8 *pbmul, |
| 19 | + const u8 *qmul) |
| 20 | +{ |
| 21 | + asm volatile (".option push\n" |
| 22 | + ".option arch,+v\n" |
| 23 | + "vsetvli x0, %[avl], e8, m1, ta, ma\n" |
| 24 | + ".option pop\n" |
| 25 | + : : |
| 26 | + [avl]"r"(16) |
| 27 | + ); |
| 28 | + |
| 29 | + /* |
| 30 | + * while ( bytes-- ) { |
| 31 | + * uint8_t px, qx, db; |
| 32 | + * |
| 33 | + * px = *p ^ *dp; |
| 34 | + * qx = qmul[*q ^ *dq]; |
| 35 | + * *dq++ = db = pbmul[px] ^ qx; |
| 36 | + * *dp++ = db ^ px; |
| 37 | + * p++; q++; |
| 38 | + * } |
| 39 | + */ |
| 40 | + while (bytes) { |
| 41 | + /* |
| 42 | + * v0:px, v1:dp, |
| 43 | + * v2:qx, v3:dq, |
| 44 | + * v4:vx, v5:vy, |
| 45 | + * v6:qm0, v7:qm1, |
| 46 | + * v8:pm0, v9:pm1, |
| 47 | + * v14:p/qm[vx], v15:p/qm[vy] |
| 48 | + */ |
| 49 | + asm volatile (".option push\n" |
| 50 | + ".option arch,+v\n" |
| 51 | + "vle8.v v0, (%[px])\n" |
| 52 | + "vle8.v v1, (%[dp])\n" |
| 53 | + "vxor.vv v0, v0, v1\n" |
| 54 | + "vle8.v v2, (%[qx])\n" |
| 55 | + "vle8.v v3, (%[dq])\n" |
| 56 | + "vxor.vv v4, v2, v3\n" |
| 57 | + "vsrl.vi v5, v4, 4\n" |
| 58 | + "vand.vi v4, v4, 0xf\n" |
| 59 | + "vle8.v v6, (%[qm0])\n" |
| 60 | + "vle8.v v7, (%[qm1])\n" |
| 61 | + "vrgather.vv v14, v6, v4\n" /* v14 = qm[vx] */ |
| 62 | + "vrgather.vv v15, v7, v5\n" /* v15 = qm[vy] */ |
| 63 | + "vxor.vv v2, v14, v15\n" /* v2 = qmul[*q ^ *dq] */ |
| 64 | + |
| 65 | + "vsrl.vi v5, v0, 4\n" |
| 66 | + "vand.vi v4, v0, 0xf\n" |
| 67 | + "vle8.v v8, (%[pm0])\n" |
| 68 | + "vle8.v v9, (%[pm1])\n" |
| 69 | + "vrgather.vv v14, v8, v4\n" /* v14 = pm[vx] */ |
| 70 | + "vrgather.vv v15, v9, v5\n" /* v15 = pm[vy] */ |
| 71 | + "vxor.vv v4, v14, v15\n" /* v4 = pbmul[px] */ |
| 72 | + "vxor.vv v3, v4, v2\n" /* v3 = db = pbmul[px] ^ qx */ |
| 73 | + "vxor.vv v1, v3, v0\n" /* v1 = db ^ px; */ |
| 74 | + "vse8.v v3, (%[dq])\n" |
| 75 | + "vse8.v v1, (%[dp])\n" |
| 76 | + ".option pop\n" |
| 77 | + : : |
| 78 | + [px]"r"(p), |
| 79 | + [dp]"r"(dp), |
| 80 | + [qx]"r"(q), |
| 81 | + [dq]"r"(dq), |
| 82 | + [qm0]"r"(qmul), |
| 83 | + [qm1]"r"(qmul + 16), |
| 84 | + [pm0]"r"(pbmul), |
| 85 | + [pm1]"r"(pbmul + 16) |
| 86 | + :); |
| 87 | + |
| 88 | + bytes -= 16; |
| 89 | + p += 16; |
| 90 | + q += 16; |
| 91 | + dp += 16; |
| 92 | + dq += 16; |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +static void __raid6_datap_recov_rvv(int bytes, u8 *p, u8 *q, |
| 97 | + u8 *dq, const u8 *qmul) |
| 98 | +{ |
| 99 | + asm volatile (".option push\n" |
| 100 | + ".option arch,+v\n" |
| 101 | + "vsetvli x0, %[avl], e8, m1, ta, ma\n" |
| 102 | + ".option pop\n" |
| 103 | + : : |
| 104 | + [avl]"r"(16) |
| 105 | + ); |
| 106 | + |
| 107 | + /* |
| 108 | + * while (bytes--) { |
| 109 | + * *p++ ^= *dq = qmul[*q ^ *dq]; |
| 110 | + * q++; dq++; |
| 111 | + * } |
| 112 | + */ |
| 113 | + while (bytes) { |
| 114 | + /* |
| 115 | + * v0:vx, v1:vy, |
| 116 | + * v2:dq, v3:p, |
| 117 | + * v4:qm0, v5:qm1, |
| 118 | + * v10:m[vx], v11:m[vy] |
| 119 | + */ |
| 120 | + asm volatile (".option push\n" |
| 121 | + ".option arch,+v\n" |
| 122 | + "vle8.v v0, (%[vx])\n" |
| 123 | + "vle8.v v2, (%[dq])\n" |
| 124 | + "vxor.vv v0, v0, v2\n" |
| 125 | + "vsrl.vi v1, v0, 4\n" |
| 126 | + "vand.vi v0, v0, 0xf\n" |
| 127 | + "vle8.v v4, (%[qm0])\n" |
| 128 | + "vle8.v v5, (%[qm1])\n" |
| 129 | + "vrgather.vv v10, v4, v0\n" |
| 130 | + "vrgather.vv v11, v5, v1\n" |
| 131 | + "vxor.vv v0, v10, v11\n" |
| 132 | + "vle8.v v1, (%[vy])\n" |
| 133 | + "vxor.vv v1, v0, v1\n" |
| 134 | + "vse8.v v0, (%[dq])\n" |
| 135 | + "vse8.v v1, (%[vy])\n" |
| 136 | + ".option pop\n" |
| 137 | + : : |
| 138 | + [vx]"r"(q), |
| 139 | + [vy]"r"(p), |
| 140 | + [dq]"r"(dq), |
| 141 | + [qm0]"r"(qmul), |
| 142 | + [qm1]"r"(qmul + 16) |
| 143 | + :); |
| 144 | + |
| 145 | + bytes -= 16; |
| 146 | + p += 16; |
| 147 | + q += 16; |
| 148 | + dq += 16; |
| 149 | + } |
| 150 | +} |
| 151 | + |
| 152 | +static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila, |
| 153 | + int failb, void **ptrs) |
| 154 | +{ |
| 155 | + u8 *p, *q, *dp, *dq; |
| 156 | + const u8 *pbmul; /* P multiplier table for B data */ |
| 157 | + const u8 *qmul; /* Q multiplier table (for both) */ |
| 158 | + |
| 159 | + p = (u8 *)ptrs[disks - 2]; |
| 160 | + q = (u8 *)ptrs[disks - 1]; |
| 161 | + |
| 162 | + /* |
| 163 | + * Compute syndrome with zero for the missing data pages |
| 164 | + * Use the dead data pages as temporary storage for |
| 165 | + * delta p and delta q |
| 166 | + */ |
| 167 | + dp = (u8 *)ptrs[faila]; |
| 168 | + ptrs[faila] = (void *)raid6_empty_zero_page; |
| 169 | + ptrs[disks - 2] = dp; |
| 170 | + dq = (u8 *)ptrs[failb]; |
| 171 | + ptrs[failb] = (void *)raid6_empty_zero_page; |
| 172 | + ptrs[disks - 1] = dq; |
| 173 | + |
| 174 | + raid6_call.gen_syndrome(disks, bytes, ptrs); |
| 175 | + |
| 176 | + /* Restore pointer table */ |
| 177 | + ptrs[faila] = dp; |
| 178 | + ptrs[failb] = dq; |
| 179 | + ptrs[disks - 2] = p; |
| 180 | + ptrs[disks - 1] = q; |
| 181 | + |
| 182 | + /* Now, pick the proper data tables */ |
| 183 | + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; |
| 184 | + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ |
| 185 | + raid6_gfexp[failb]]]; |
| 186 | + |
| 187 | + kernel_vector_begin(); |
| 188 | + __raid6_2data_recov_rvv(bytes, p, q, dp, dq, pbmul, qmul); |
| 189 | + kernel_vector_end(); |
| 190 | +} |
| 191 | + |
| 192 | +static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila, |
| 193 | + void **ptrs) |
| 194 | +{ |
| 195 | + u8 *p, *q, *dq; |
| 196 | + const u8 *qmul; /* Q multiplier table */ |
| 197 | + |
| 198 | + p = (u8 *)ptrs[disks - 2]; |
| 199 | + q = (u8 *)ptrs[disks - 1]; |
| 200 | + |
| 201 | + /* |
| 202 | + * Compute syndrome with zero for the missing data page |
| 203 | + * Use the dead data page as temporary storage for delta q |
| 204 | + */ |
| 205 | + dq = (u8 *)ptrs[faila]; |
| 206 | + ptrs[faila] = (void *)raid6_empty_zero_page; |
| 207 | + ptrs[disks - 1] = dq; |
| 208 | + |
| 209 | + raid6_call.gen_syndrome(disks, bytes, ptrs); |
| 210 | + |
| 211 | + /* Restore pointer table */ |
| 212 | + ptrs[faila] = dq; |
| 213 | + ptrs[disks - 1] = q; |
| 214 | + |
| 215 | + /* Now, pick the proper data tables */ |
| 216 | + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; |
| 217 | + |
| 218 | + kernel_vector_begin(); |
| 219 | + __raid6_datap_recov_rvv(bytes, p, q, dq, qmul); |
| 220 | + kernel_vector_end(); |
| 221 | +} |
| 222 | + |
| 223 | +const struct raid6_recov_calls raid6_recov_rvv = { |
| 224 | + .data2 = raid6_2data_recov_rvv, |
| 225 | + .datap = raid6_datap_recov_rvv, |
| 226 | + .valid = rvv_has_vector, |
| 227 | + .name = "rvv", |
| 228 | + .priority = 1, |
| 229 | +}; |
0 commit comments