7
7
8
8
/*
9
9
* How to handle various characters in refnames:
10
+ * This table is used by both the SIMD and non-SIMD code. It has
11
+ * some cases that are only useful for the SIMD; these are handled
12
+ * equivalently to the listed disposition in the non-SIMD code.
10
13
* 0: An acceptable character for refs
11
- * 1: End-of-component
12
- * 2: ., look for a preceding . to reject .. in refs
13
- * 3: {, look for a preceding @ to reject @{ in refs
14
- * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
14
+ * 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
15
+ * 2: \0: End-of-component and string
16
+ * 3: /: End-of-component (SIMD or = 2)
17
+ * 4: ., look for a preceding . to reject .. in refs
18
+ * 5: {, look for a preceding @ to reject @{ in refs
19
+ * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
20
+ * 7: A bad character except * (see check_refname_component below)
15
21
*/
16
22
static unsigned char refname_disposition [256 ] = {
17
- 1 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
18
- 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
19
- 4 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 2 , 1 ,
20
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 0 , 4 ,
23
+ 2 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
24
+ 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
25
+ 7 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 6 , 0 , 0 , 0 , 4 , 3 ,
26
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 0 , 0 , 0 , 0 , 7 ,
27
+ 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
28
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 7 , 0 , 7 , 0 ,
21
29
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
22
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 4 , 0 , 4 , 0 ,
23
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
24
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 3 , 0 , 0 , 4 , 4
30
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 5 , 0 , 0 , 7 , 7
25
31
};
26
32
27
33
/*
@@ -33,8 +39,9 @@ static unsigned char refname_disposition[256] = {
33
39
* - any path component of it begins with ".", or
34
40
* - it has double dots "..", or
35
41
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
36
- * - it ends with a "/".
37
- * - it ends with ".lock"
42
+ * - it has pattern-matching notation "*", "?", "[", anywhere, or
43
+ * - it ends with a "/", or
44
+ * - it ends with ".lock", or
38
45
* - it contains a "\" (backslash)
39
46
*/
40
47
static int check_refname_component (const char * refname , int flags )
@@ -46,17 +53,19 @@ static int check_refname_component(const char *refname, int flags)
46
53
int ch = * cp & 255 ;
47
54
unsigned char disp = refname_disposition [ch ];
48
55
switch (disp ) {
49
- case 1 :
56
+ case 2 : /* fall-through */
57
+ case 3 :
50
58
goto out ;
51
- case 2 :
59
+ case 4 :
52
60
if (last == '.' )
53
61
return -1 ; /* Refname contains "..". */
54
62
break ;
55
- case 3 :
63
+ case 5 :
56
64
if (last == '@' )
57
65
return -1 ; /* Refname contains "@{". */
58
66
break ;
59
- case 4 :
67
+ case 6 : /* fall-through */
68
+ case 7 :
60
69
return -1 ;
61
70
}
62
71
last = ch ;
@@ -79,7 +88,7 @@ static int check_refname_component(const char *refname, int flags)
79
88
return cp - refname ;
80
89
}
81
90
82
- int check_refname_format (const char * refname , int flags )
91
+ static int check_refname_format_bytewise (const char * refname , int flags )
83
92
{
84
93
int component_len , component_count = 0 ;
85
94
@@ -115,6 +124,195 @@ int check_refname_format(const char *refname, int flags)
115
124
return 0 ;
116
125
}
117
126
127
+ #if defined(__GNUC__ ) && defined(__x86_64__ )
128
+ #define SSE_VECTOR_BYTES 16
129
+
130
+ /* Vectorized version of check_refname_format. */
131
+ int check_refname_format (const char * refname , int flags )
132
+ {
133
+ const char * cp = refname ;
134
+
135
+ const __m128i dot = _mm_set1_epi8 ('.' );
136
+ const __m128i at = _mm_set1_epi8 ('@' );
137
+ const __m128i curly = _mm_set1_epi8 ('{' );
138
+ const __m128i slash = _mm_set1_epi8 ('/' );
139
+ const __m128i zero = _mm_set1_epi8 ('\000' );
140
+ const __m128i el = _mm_set1_epi8 ('l' );
141
+
142
+ /* below '*', all characters are forbidden or rare */
143
+ const __m128i star_ub = _mm_set1_epi8 ('*' + 1 );
144
+
145
+ const __m128i colon = _mm_set1_epi8 (':' );
146
+ const __m128i question = _mm_set1_epi8 ('?' );
147
+
148
+ /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
149
+ const __m128i bracket_lb = _mm_set1_epi8 ('[' - 1 );
150
+ const __m128i caret_ub = _mm_set1_epi8 ('^' + 1 );
151
+
152
+ /* '~' and above are forbidden */
153
+ const __m128i tilde_lb = _mm_set1_epi8 ('~' - 1 );
154
+
155
+ int component_count = 0 ;
156
+
157
+ if (refname [0 ] == 0 || refname [0 ] == '/' ) {
158
+ /* entirely empty ref or initial ref component */
159
+ return -1 ;
160
+ }
161
+
162
+ /*
163
+ * Initial ref component of '.'; below we look for /. so we'll
164
+ * miss this.
165
+ */
166
+ if (refname [0 ] == '.' ) {
167
+ if (refname [1 ] == '/' || refname [1 ] == '\0' )
168
+ return -1 ;
169
+ if (!(flags & REFNAME_DOT_COMPONENT ))
170
+ return -1 ;
171
+ }
172
+ while (1 ) {
173
+ __m128i tmp , tmp1 , result ;
174
+ uint64_t mask ;
175
+
176
+ if ((uintptr_t ) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1 )
177
+ /*
178
+ * End-of-page; fall back to slow method for
179
+ * this entire ref.
180
+ */
181
+ return check_refname_format_bytewise (refname , flags );
182
+
183
+ tmp = _mm_loadu_si128 ((__m128i * )cp );
184
+ tmp1 = _mm_loadu_si128 ((__m128i * )(cp + 1 ));
185
+
186
+ /*
187
+ * This range (note the lt) contains some
188
+ * permissible-but-rare characters (including all
189
+ * characters >= 128), which we handle later. It also
190
+ * includes \000.
191
+ */
192
+ result = _mm_cmplt_epi8 (tmp , star_ub );
193
+
194
+ result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , question ));
195
+ result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , colon ));
196
+
197
+ /* This range contains the permissible ] as bycatch */
198
+ result = _mm_or_si128 (result , _mm_and_si128 (
199
+ _mm_cmpgt_epi8 (tmp , bracket_lb ),
200
+ _mm_cmplt_epi8 (tmp , caret_ub )));
201
+
202
+ result = _mm_or_si128 (result , _mm_cmpgt_epi8 (tmp , tilde_lb ));
203
+
204
+ /* .. */
205
+ result = _mm_or_si128 (result , _mm_and_si128 (
206
+ _mm_cmpeq_epi8 (tmp , dot ),
207
+ _mm_cmpeq_epi8 (tmp1 , dot )));
208
+ /* @{ */
209
+ result = _mm_or_si128 (result , _mm_and_si128 (
210
+ _mm_cmpeq_epi8 (tmp , at ),
211
+ _mm_cmpeq_epi8 (tmp1 , curly )));
212
+ /* // */
213
+ result = _mm_or_si128 (result , _mm_and_si128 (
214
+ _mm_cmpeq_epi8 (tmp , slash ),
215
+ _mm_cmpeq_epi8 (tmp1 , slash )));
216
+ /* trailing / */
217
+ result = _mm_or_si128 (result , _mm_and_si128 (
218
+ _mm_cmpeq_epi8 (tmp , slash ),
219
+ _mm_cmpeq_epi8 (tmp1 , zero )));
220
+ /* .l, beginning of .lock */
221
+ result = _mm_or_si128 (result , _mm_and_si128 (
222
+ _mm_cmpeq_epi8 (tmp , dot ),
223
+ _mm_cmpeq_epi8 (tmp1 , el )));
224
+ /*
225
+ * Even though /. is not necessarily an error, we flag
226
+ * it anyway. If we find it, we'll check if it's valid
227
+ * and if so we'll advance just past it.
228
+ */
229
+ result = _mm_or_si128 (result , _mm_and_si128 (
230
+ _mm_cmpeq_epi8 (tmp , slash ),
231
+ _mm_cmpeq_epi8 (tmp1 , dot )));
232
+
233
+ mask = _mm_movemask_epi8 (result );
234
+ if (mask ) {
235
+ /*
236
+ * We've found either end-of-string, or some
237
+ * probably-bad character or substring.
238
+ */
239
+ int i = __builtin_ctz (mask );
240
+ switch (refname_disposition [cp [i ] & 255 ]) {
241
+ case 0 : /* fall-through */
242
+ case 5 :
243
+ /*
244
+ * bycatch: a good character that's in
245
+ * one of the ranges of mostly-forbidden
246
+ * characters
247
+ */
248
+ cp += i + 1 ;
249
+ break ;
250
+ case 1 :
251
+ if (cp [i + 1 ] == '{' )
252
+ return -1 ;
253
+ cp += i + 1 ;
254
+ break ;
255
+ case 2 :
256
+ if (!(flags & REFNAME_ALLOW_ONELEVEL )
257
+ && !component_count && !strchr (refname , '/' ))
258
+ /* Refname has only one component. */
259
+ return -1 ;
260
+ return 0 ;
261
+ case 3 :
262
+ component_count ++ ;
263
+ /*
264
+ * Even if leading dots are allowed, don't
265
+ * allow "." as a component (".." is
266
+ * prevented by case 4 below).
267
+ */
268
+ if (cp [i + 1 ] == '.' ) {
269
+ if (cp [i + 2 ] == '\0' )
270
+ return -1 ;
271
+ if (flags & REFNAME_DOT_COMPONENT ) {
272
+ /* skip to just after the /. */
273
+ cp += i + 2 ;
274
+ break ;
275
+ }
276
+ return -1 ;
277
+ } else if (cp [i + 1 ] == '/' || cp [i + 1 ] == '\0' )
278
+ return -1 ;
279
+ break ;
280
+ case 4 :
281
+ if (cp [i + 1 ] == '.' || cp [i + 1 ] == '\0' )
282
+ return -1 ;
283
+ /* .lock as end-of-component or end-of-string */
284
+ if ((!strncmp (cp + i , ".lock" , 5 ))
285
+ && (cp [i + 5 ] == '/' || cp [i + 5 ] == 0 ))
286
+ return -1 ;
287
+ cp += 1 ;
288
+ break ;
289
+ case 6 :
290
+ if (((cp == refname + i ) || cp [i - 1 ] == '/' )
291
+ && (cp [i + 1 ] == '/' || cp [i + 1 ] == 0 ))
292
+ if (flags & REFNAME_REFSPEC_PATTERN ) {
293
+ flags &= ~REFNAME_REFSPEC_PATTERN ;
294
+ /* restart after the * */
295
+ cp += i + 1 ;
296
+ continue ;
297
+ }
298
+ /* fall-through */
299
+ case 7 :
300
+ return -1 ;
301
+ }
302
+ } else
303
+ cp += SSE_VECTOR_BYTES ;
304
+ }
305
+ }
306
+
307
+ #else
308
+
309
+ int check_refname_format (const char * refname , int flags )
310
+ {
311
+ return check_refname_format_bytewise (refname , flags );
312
+ }
313
+
314
+ #endif
315
+
118
316
struct ref_entry ;
119
317
120
318
/*
0 commit comments