1
+ ########################################################################
2
+ # Copyright (c) 2025 ZTE Corporation.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions
6
+ # are met:
7
+ # * Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # * Redistributions in binary form must reproduce the above copyright
10
+ # notice, this list of conditions and the following disclaimer in
11
+ # the documentation and /or other materials provided with the
12
+ # distribution.
13
+ # * Neither the name of ZTE Corporation nor the names of its
14
+ # contributors may be used to endorse or promote products derived
15
+ # from this software without specific prior written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ # DATA , OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ #########################################################################
29
+ #if HAVE_ZBC && HAVE_ZVBC
30
+ #include "crc16_t10dif_vclmul.h"
31
+
32
+ .section .text
33
+ .align 2
34
+ .global crc16_t10dif_copy_vclmul
35
+
36
+ # Arguments:
37
+ # a0 : uint16_t crc (seed)
38
+ # a1 : uint8_t *dst (pointer to data)
39
+ # a2 : uint8_t *src (pointer to data)
40
+ # a3 : size_t len (data length)
41
+
42
+ crc16_t10dif_copy_vclmul:
43
+ # initialize seed for calculation in CRC32 format
44
+ slli a0 , a0 , 16
45
+ li t1, 64
46
+ bgeu a3 , t1, .crc_fold
47
+
48
+ .crc_table_loop_pre:
49
+ beq a3 , zero, .end
50
+ la a7 , .lanchor_crc_tab
51
+ add a3 , a3 , a2
52
+
53
+ .crc_table_loop:
54
+ lbu a4 , 0 (a2 )
55
+ sb a4 , 0 (a1 )
56
+ addi a2 , a2 , 1
57
+ addi a1 , a1 , 1
58
+ slliw a4 , a4 , 0x18
59
+ xor a4 , a4 , a0
60
+ srliw a5 , a4 , 0x18
61
+ slli a5 , a5 , 0x2
62
+ add a5 , a5 , a7
63
+ lw a0 , 0 (a5 )
64
+ slliw a4 , a4 , 0x8
65
+ xor a0 , a0 , a4
66
+ bne a2 , a3 , .crc_table_loop
67
+
68
+ .end:
69
+ slli a0 , a0 , 32
70
+ srli a0 , a0 , 48
71
+ ret
72
+
73
+ .crc_fold:
74
+ vsetivli zero, 2 , e64, m1, ta, ma
75
+ vl4re64.v v4, 0 (a2 )
76
+ addi a2 , a2 , 64
77
+ addi a3 , a3 , -64
78
+
79
+ vs4r.v v4, (a1 )
80
+ addi a1 , a1 , 64
81
+
82
+ la t0, .shuffle_data_mask
83
+ vsetivli zero, 16 , e8, m1, ta, ma
84
+ vle8.v v13, 0 (t0)
85
+ slli a0 , a0 , 32
86
+ vrgather.vv v0, v4, v13
87
+ vrgather.vv v1, v5, v13
88
+ vrgather.vv v2, v6, v13
89
+ vrgather.vv v3, v7, v13
90
+ vsetivli zero, 2 , e64, m1, ta, ma
91
+
92
+ vmv.v.x v5, a0
93
+ vmv.s.x v4, zero
94
+ vslideup.vi v4, v5, 1
95
+ la t2, .crc_loop_const
96
+ vle64.v v5, 0 (t2)
97
+ vxor.vv v0, v0, v4
98
+ bltu a3 , t1, crc_fold_finalization
99
+
100
+ li t0, 64
101
+
102
+ crc_fold_loop:
103
+ vl4re64.v v8, (a2 )
104
+ addi a2 , a2 , 64
105
+ vs4r.v v8, (a1 )
106
+ addi a1 , a1 , 64
107
+
108
+ vclmul.vv v4, v0, v5
109
+ vclmulh.vv v0, v0, v5
110
+ vslidedown.vi v15, v4, 1
111
+ vslidedown.vi v14, v0, 1
112
+ vxor.vv v15, v15, v4
113
+ vxor.vv v14, v14, v0
114
+ vslideup.vi v15, v14, 1
115
+
116
+ vclmul.vv v4, v1, v5
117
+ vclmulh.vv v1, v1, v5
118
+ vslidedown.vi v16, v4, 1
119
+ vslidedown.vi v14, v1, 1
120
+ vxor.vv v16, v16, v4
121
+ vxor.vv v14, v14, v1
122
+ vslideup.vi v16, v14, 1
123
+
124
+ vclmul.vv v4, v2, v5
125
+ vclmulh.vv v2, v2, v5
126
+ vslidedown.vi v17, v4, 1
127
+ vslidedown.vi v14, v2, 1
128
+ vxor.vv v17, v17, v4
129
+ vxor.vv v14, v14, v2
130
+ vslideup.vi v17, v14, 1
131
+
132
+ vclmul.vv v4, v3, v5
133
+ vclmulh.vv v3, v3, v5
134
+ vslidedown.vi v18, v4, 1
135
+ vslidedown.vi v14, v3, 1
136
+ vxor.vv v18, v18, v4
137
+ vxor.vv v14, v14, v3
138
+ vslideup.vi v18, v14, 1
139
+
140
+ vsetivli zero, 16 , e8, m1, ta, ma
141
+ vrgather.vv v0, v8, v13
142
+ vrgather.vv v1, v9, v13
143
+ vrgather.vv v2, v10, v13
144
+ vrgather.vv v3, v11, v13
145
+ vsetivli zero, 2 , e64, m1, ta, ma
146
+ vxor.vv v0, v0, v15
147
+ vxor.vv v1, v1, v16
148
+ vxor.vv v2, v2, v17
149
+ vxor.vv v3, v3, v18
150
+
151
+ addi a3 , a3 , -64
152
+ bge a3 , t0, crc_fold_loop
153
+
154
+ crc_fold_finalization:
155
+ # 512bit -> 128bit folding
156
+ addi t2, t2, 16
157
+ vle64.v v5, 0 (t2)
158
+ vclmul.vv v6, v0, v5
159
+ vclmulh.vv v7, v0, v5
160
+ vslidedown.vi v8, v6, 1
161
+ vslidedown.vi v9, v7, 1
162
+ vxor.vv v8, v8, v6
163
+ vxor.vv v9, v9, v7
164
+ vslideup.vi v8, v9, 1
165
+ vxor.vv v0, v8, v1
166
+
167
+ vclmul.vv v6, v0, v5
168
+ vclmulh.vv v7, v0, v5
169
+ vslidedown.vi v8, v6, 1
170
+ vslidedown.vi v9, v7, 1
171
+ vxor.vv v8, v8, v6
172
+ vxor.vv v9, v9, v7
173
+ vslideup.vi v8, v9, 1
174
+ vxor.vv v0, v8, v2
175
+
176
+ vclmul.vv v6, v0, v5
177
+ vclmulh.vv v7, v0, v5
178
+ vslidedown.vi v8, v6, 1
179
+ vslidedown.vi v9, v7, 1
180
+ vxor.vv v8, v8, v6
181
+ vxor.vv v9, v9, v7
182
+ vslideup.vi v8, v9, 1
183
+ vxor.vv v0, v8, v3
184
+
185
+ # 128bit -> 64bit folding
186
+ vmv.x.s t0, v0
187
+ vslidedown.vi v0, v0, 1
188
+ vmv.x.s t1, v0
189
+ li t2, const_low
190
+ li t3, const_high
191
+ clmul a4 , t1, t2
192
+ clmulh a5 , t1, t2
193
+ slli a6 , t0, 32
194
+ srli a7 , t0, 32
195
+ xor a4 , a4 , a6
196
+ xor a5 , a5 , a7
197
+ clmul a5 , a5 , t3
198
+ xor a4 , a4 , a5
199
+
200
+ # Barrett reduction
201
+ srli a5 , a4 , 32
202
+ li t2, const_quo
203
+ clmul a5 , t2, a5
204
+ srli a5 , a5 , 32
205
+ li t3, const_poly
206
+ clmul a5 , a5 , t3
207
+ xor a0 , a5 , a4
208
+
209
+ tail_processing:
210
+ beqz a3 , .end
211
+ jal x0, .crc_table_loop_pre
212
+
213
+ .shuffle_data_mask = . + 0
214
+ .type shuffle_data, %object
215
+ .size shuffle_data, 16
216
+ shuffle_data:
217
+ .byte 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8
218
+ .byte 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0
219
+
220
+ #endif
0 commit comments