|
| 1 | +/********************************************************************** |
| 2 | + Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS). |
| 3 | + |
| 4 | + Redistribution and use in source and binary forms, with or without |
| 5 | + modification, are permitted provided that the following conditions |
| 6 | + are met: |
| 7 | + * Redistributions of source code must retain the above copyright |
| 8 | + notice, this list of conditions and the following disclaimer. |
| 9 | + * Redistributions in binary form must reproduce the above copyright |
| 10 | + notice, this list of conditions and the following disclaimer in |
| 11 | + the documentation and/or other materials provided with the |
| 12 | + distribution. |
| 13 | + * Neither the name of ISCAS nor the names of its |
| 14 | + contributors may be used to endorse or promote products derived |
| 15 | + from this software without specific prior written permission. |
| 16 | + |
| 17 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 18 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 19 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 20 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 21 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 22 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 23 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 24 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 25 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 26 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 27 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | +**********************************************************************/ |
| 29 | +#if HAVE_RVV |
| 30 | +.option arch, +v |
| 31 | +.global pq_gen_rvv |
| 32 | +.type pq_gen_rvv, %function |
| 33 | +pq_gen_rvv: |
| 34 | + srli a1, a1, 3 // blocks = len / 8 |
| 35 | + beqz a1, ret0 // blocks <= 0 |
| 36 | + addi a6, a0, -3 // j = vects - 4 |
| 37 | + blez a6, ret1 // vects < 4 |
| 38 | + |
| 39 | + slli t0, a0, 3 // t0 = vects * 8 |
| 40 | + add t0, a2, t0 // array + vects * 8 |
| 41 | + li t1, 0x8080808080808080 // bit7 |
| 42 | + li t2, 0xfefefefefefefefe // notbit0 |
| 43 | + li t3, 0x1d1d1d1d1d1d1d1d // gf8poly |
| 44 | + ld a3, -24(t0) // src[vects-3] |
| 45 | + ld a4, -16(t0) // p |
| 46 | + ld a5, -8(t0) // q |
| 47 | + mv t6, a1 // save blocks |
| 48 | + mv t5, a4 // save p |
| 49 | + mv a7, a5 // save q |
| 50 | + |
| 51 | +init_pq: |
| 52 | + vsetvli t4, t6, e64, m4, ta, ma |
| 53 | + vle64.v v0, (a3) |
| 54 | + vse64.v v0, (a4) // init p |
| 55 | + vse64.v v0, (a5) // init q |
| 56 | + sub t6, t6, t4 |
| 57 | + slli t4, t4, 3 |
| 58 | + add a3, a3, t4 |
| 59 | + add a4, a4, t4 |
| 60 | + add a5, a5, t4 |
| 61 | + bnez t6, init_pq |
| 62 | + |
| 63 | +outer_j: |
| 64 | + mv a4, t5 // restore p |
| 65 | + mv a5, a7 // restore q |
| 66 | + mv t6, a1 // restore blocks |
| 67 | + ld a0, -32(t0) // src[j] |
| 68 | + |
| 69 | +inner_block: |
| 70 | + vsetvli t4, t6, e64, m4, ta, ma |
| 71 | + vle64.v v8, (a0) // s |
| 72 | + vle64.v v0, (a4) // p |
| 73 | + vle64.v v4, (a5) // q |
| 74 | + vxor.vv v0, v0, v8 // p ^= s |
| 75 | + vand.vx v20, v4, t1 // q & bit7 |
| 76 | + vsll.vi v24, v4, 1 // (q << 1) |
| 77 | + vand.vx v24, v24, t2 // (q << 1) & notbit0 |
| 78 | + vsrl.vi v16, v20, 7 // (q & bit7)>>7 |
| 79 | + vsll.vi v20, v20, 1 // (q & bit7)<<1 |
| 80 | + vsub.vv v20, v20, v16 // (q & bit7)<<1 - (q & bit7)>>7 |
| 81 | + vand.vx v20, v20, t3 // ((q & bit7)<<1 - (q & bit7)>>7) & gf8poly |
| 82 | + vxor.vv v4, v24, v20 // ((q << 1) & notbit0) ^ |
| 83 | + vxor.vv v4, v4, v8 // s^ |
| 84 | + vse64.v v0, (a4) // p |
| 85 | + vse64.v v4, (a5) // q |
| 86 | + sub t6, t6, t4 // blocks |
| 87 | + slli t4, t4, 3 |
| 88 | + add a4, a4, t4 // p+= |
| 89 | + add a5, a5, t4 // q+= |
| 90 | + add a0, a0, t4 // s+= |
| 91 | + bnez t6, inner_block |
| 92 | + |
| 93 | + addi a6, a6, -1 |
| 94 | + addi t0, t0, -8 |
| 95 | + bnez a6, outer_j |
| 96 | + |
| 97 | +ret0: |
| 98 | + li a0, 0 |
| 99 | + ret |
| 100 | + |
| 101 | +ret1: |
| 102 | + li a0, 1 |
| 103 | + ret |
| 104 | + |
| 105 | +#endif |
0 commit comments