7
7
8
8
/*
9
9
* How to handle various characters in refnames:
10
- * This table is used by both the SIMD and non-SIMD code. It has
11
- * some cases that are only useful for the SIMD; these are handled
12
- * equivalently to the listed disposition in the non-SIMD code.
13
10
* 0: An acceptable character for refs
14
- * 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
15
- * 2: \0: End-of-component and string
16
- * 3: /: End-of-component (SIMD or = 2)
17
- * 4: ., look for a preceding . to reject .. in refs
18
- * 5: {, look for a preceding @ to reject @{ in refs
19
- * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
20
- * 7: A bad character except * (see check_refname_component below)
11
+ * 1: End-of-component
12
+ * 2: ., look for a preceding . to reject .. in refs
13
+ * 3: {, look for a preceding @ to reject @{ in refs
14
+ * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
21
15
*/
22
16
static unsigned char refname_disposition [256 ] = {
23
- 2 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
24
- 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
25
- 7 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 6 , 0 , 0 , 0 , 4 , 3 ,
26
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 0 , 0 , 0 , 0 , 7 ,
27
- 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
28
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 7 , 0 , 7 , 0 ,
17
+ 1 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
18
+ 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
19
+ 4 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 2 , 1 ,
20
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 0 , 4 ,
29
21
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
30
- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 5 , 0 , 0 , 7 , 7
22
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 4 , 0 , 4 , 0 ,
23
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
24
+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 3 , 0 , 0 , 4 , 4
31
25
};
32
26
33
27
/*
@@ -39,9 +33,8 @@ static unsigned char refname_disposition[256] = {
39
33
* - any path component of it begins with ".", or
40
34
* - it has double dots "..", or
41
35
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
42
- * - it has pattern-matching notation "*", "?", "[", anywhere, or
43
- * - it ends with a "/", or
44
- * - it ends with ".lock", or
36
+ * - it ends with a "/".
37
+ * - it ends with ".lock"
45
38
* - it contains a "\" (backslash)
46
39
*/
47
40
static int check_refname_component (const char * refname , int flags )
@@ -53,19 +46,17 @@ static int check_refname_component(const char *refname, int flags)
53
46
int ch = * cp & 255 ;
54
47
unsigned char disp = refname_disposition [ch ];
55
48
switch (disp ) {
56
- case 2 : /* fall-through */
57
- case 3 :
49
+ case 1 :
58
50
goto out ;
59
- case 4 :
51
+ case 2 :
60
52
if (last == '.' )
61
53
return -1 ; /* Refname contains "..". */
62
54
break ;
63
- case 5 :
55
+ case 3 :
64
56
if (last == '@' )
65
57
return -1 ; /* Refname contains "@{". */
66
58
break ;
67
- case 6 : /* fall-through */
68
- case 7 :
59
+ case 4 :
69
60
return -1 ;
70
61
}
71
62
last = ch ;
@@ -88,7 +79,7 @@ static int check_refname_component(const char *refname, int flags)
88
79
return cp - refname ;
89
80
}
90
81
91
- static int check_refname_format_bytewise (const char * refname , int flags )
82
+ int check_refname_format (const char * refname , int flags )
92
83
{
93
84
int component_len , component_count = 0 ;
94
85
@@ -124,195 +115,6 @@ static int check_refname_format_bytewise(const char *refname, int flags)
124
115
return 0 ;
125
116
}
126
117
127
- #if defined(__GNUC__ ) && defined(__x86_64__ )
128
- #define SSE_VECTOR_BYTES 16
129
-
130
- /* Vectorized version of check_refname_format. */
131
- int check_refname_format (const char * refname , int flags )
132
- {
133
- const char * cp = refname ;
134
-
135
- const __m128i dot = _mm_set1_epi8 ('.' );
136
- const __m128i at = _mm_set1_epi8 ('@' );
137
- const __m128i curly = _mm_set1_epi8 ('{' );
138
- const __m128i slash = _mm_set1_epi8 ('/' );
139
- const __m128i zero = _mm_set1_epi8 ('\000' );
140
- const __m128i el = _mm_set1_epi8 ('l' );
141
-
142
- /* below '*', all characters are forbidden or rare */
143
- const __m128i star_ub = _mm_set1_epi8 ('*' + 1 );
144
-
145
- const __m128i colon = _mm_set1_epi8 (':' );
146
- const __m128i question = _mm_set1_epi8 ('?' );
147
-
148
- /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
149
- const __m128i bracket_lb = _mm_set1_epi8 ('[' - 1 );
150
- const __m128i caret_ub = _mm_set1_epi8 ('^' + 1 );
151
-
152
- /* '~' and above are forbidden */
153
- const __m128i tilde_lb = _mm_set1_epi8 ('~' - 1 );
154
-
155
- int component_count = 0 ;
156
-
157
- if (refname [0 ] == 0 || refname [0 ] == '/' ) {
158
- /* entirely empty ref or initial ref component */
159
- return -1 ;
160
- }
161
-
162
- /*
163
- * Initial ref component of '.'; below we look for /. so we'll
164
- * miss this.
165
- */
166
- if (refname [0 ] == '.' ) {
167
- if (refname [1 ] == '/' || refname [1 ] == '\0' )
168
- return -1 ;
169
- if (!(flags & REFNAME_DOT_COMPONENT ))
170
- return -1 ;
171
- }
172
- while (1 ) {
173
- __m128i tmp , tmp1 , result ;
174
- uint64_t mask ;
175
-
176
- if ((uintptr_t ) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1 )
177
- /*
178
- * End-of-page; fall back to slow method for
179
- * this entire ref.
180
- */
181
- return check_refname_format_bytewise (refname , flags );
182
-
183
- tmp = _mm_loadu_si128 ((__m128i * )cp );
184
- tmp1 = _mm_loadu_si128 ((__m128i * )(cp + 1 ));
185
-
186
- /*
187
- * This range (note the lt) contains some
188
- * permissible-but-rare characters (including all
189
- * characters >= 128), which we handle later. It also
190
- * includes \000.
191
- */
192
- result = _mm_cmplt_epi8 (tmp , star_ub );
193
-
194
- result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , question ));
195
- result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , colon ));
196
-
197
- /* This range contains the permissible ] as bycatch */
198
- result = _mm_or_si128 (result , _mm_and_si128 (
199
- _mm_cmpgt_epi8 (tmp , bracket_lb ),
200
- _mm_cmplt_epi8 (tmp , caret_ub )));
201
-
202
- result = _mm_or_si128 (result , _mm_cmpgt_epi8 (tmp , tilde_lb ));
203
-
204
- /* .. */
205
- result = _mm_or_si128 (result , _mm_and_si128 (
206
- _mm_cmpeq_epi8 (tmp , dot ),
207
- _mm_cmpeq_epi8 (tmp1 , dot )));
208
- /* @{ */
209
- result = _mm_or_si128 (result , _mm_and_si128 (
210
- _mm_cmpeq_epi8 (tmp , at ),
211
- _mm_cmpeq_epi8 (tmp1 , curly )));
212
- /* // */
213
- result = _mm_or_si128 (result , _mm_and_si128 (
214
- _mm_cmpeq_epi8 (tmp , slash ),
215
- _mm_cmpeq_epi8 (tmp1 , slash )));
216
- /* trailing / */
217
- result = _mm_or_si128 (result , _mm_and_si128 (
218
- _mm_cmpeq_epi8 (tmp , slash ),
219
- _mm_cmpeq_epi8 (tmp1 , zero )));
220
- /* .l, beginning of .lock */
221
- result = _mm_or_si128 (result , _mm_and_si128 (
222
- _mm_cmpeq_epi8 (tmp , dot ),
223
- _mm_cmpeq_epi8 (tmp1 , el )));
224
- /*
225
- * Even though /. is not necessarily an error, we flag
226
- * it anyway. If we find it, we'll check if it's valid
227
- * and if so we'll advance just past it.
228
- */
229
- result = _mm_or_si128 (result , _mm_and_si128 (
230
- _mm_cmpeq_epi8 (tmp , slash ),
231
- _mm_cmpeq_epi8 (tmp1 , dot )));
232
-
233
- mask = _mm_movemask_epi8 (result );
234
- if (mask ) {
235
- /*
236
- * We've found either end-of-string, or some
237
- * probably-bad character or substring.
238
- */
239
- int i = __builtin_ctz (mask );
240
- switch (refname_disposition [cp [i ] & 255 ]) {
241
- case 0 : /* fall-through */
242
- case 5 :
243
- /*
244
- * bycatch: a good character that's in
245
- * one of the ranges of mostly-forbidden
246
- * characters
247
- */
248
- cp += i + 1 ;
249
- break ;
250
- case 1 :
251
- if (cp [i + 1 ] == '{' )
252
- return -1 ;
253
- cp += i + 1 ;
254
- break ;
255
- case 2 :
256
- if (!(flags & REFNAME_ALLOW_ONELEVEL )
257
- && !component_count && !strchr (refname , '/' ))
258
- /* Refname has only one component. */
259
- return -1 ;
260
- return 0 ;
261
- case 3 :
262
- component_count ++ ;
263
- /*
264
- * Even if leading dots are allowed, don't
265
- * allow "." as a component (".." is
266
- * prevented by case 4 below).
267
- */
268
- if (cp [i + 1 ] == '.' ) {
269
- if (cp [i + 2 ] == '\0' )
270
- return -1 ;
271
- if (flags & REFNAME_DOT_COMPONENT ) {
272
- /* skip to just after the /. */
273
- cp += i + 2 ;
274
- break ;
275
- }
276
- return -1 ;
277
- } else if (cp [i + 1 ] == '/' || cp [i + 1 ] == '\0' )
278
- return -1 ;
279
- break ;
280
- case 4 :
281
- if (cp [i + 1 ] == '.' || cp [i + 1 ] == '\0' )
282
- return -1 ;
283
- /* .lock as end-of-component or end-of-string */
284
- if ((!strncmp (cp + i , ".lock" , 5 ))
285
- && (cp [i + 5 ] == '/' || cp [i + 5 ] == 0 ))
286
- return -1 ;
287
- cp += 1 ;
288
- break ;
289
- case 6 :
290
- if (((cp == refname + i ) || cp [i - 1 ] == '/' )
291
- && (cp [i + 1 ] == '/' || cp [i + 1 ] == 0 ))
292
- if (flags & REFNAME_REFSPEC_PATTERN ) {
293
- flags &= ~REFNAME_REFSPEC_PATTERN ;
294
- /* restart after the * */
295
- cp += i + 1 ;
296
- continue ;
297
- }
298
- /* fall-through */
299
- case 7 :
300
- return -1 ;
301
- }
302
- } else
303
- cp += SSE_VECTOR_BYTES ;
304
- }
305
- }
306
-
307
- #else
308
-
309
- int check_refname_format (const char * refname , int flags )
310
- {
311
- return check_refname_format_bytewise (refname , flags );
312
- }
313
-
314
- #endif
315
-
316
118
struct ref_entry ;
317
119
318
120
/*
0 commit comments