77
88/*
99 * How to handle various characters in refnames:
10+ * This table is used by both the SIMD and non-SIMD code. It has
11+ * some cases that are only useful for the SIMD; these are handled
12+ * equivalently to the listed disposition in the non-SIMD code.
1013 * 0: An acceptable character for refs
11- * 1: End-of-component
12- * 2: ., look for a preceding . to reject .. in refs
13- * 3: {, look for a preceding @ to reject @{ in refs
14- * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
14+ * 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
15+ * 2: \0: End-of-component and string
16+ * 3: /: End-of-component (SIMD or = 2)
17+ * 4: ., look for a preceding . to reject .. in refs
18+ * 5: {, look for a preceding @ to reject @{ in refs
19+ * 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
20+ * 7: A bad character except * (see check_refname_component below)
1521 */
1622static unsigned char refname_disposition [256 ] = {
17- 1 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
18- 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 ,
19- 4 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 2 , 1 ,
20- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 0 , 0 , 0 , 0 , 4 ,
23+ 2 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
24+ 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
25+ 7 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 6 , 0 , 0 , 0 , 4 , 3 ,
26+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 0 , 0 , 0 , 0 , 7 ,
27+ 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
28+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 7 , 7 , 0 , 7 , 0 ,
2129 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
22- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 4 , 4 , 0 , 4 , 0 ,
23- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
24- 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 3 , 0 , 0 , 4 , 4
30+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 5 , 0 , 0 , 7 , 7
2531};
2632
2733/*
@@ -33,8 +39,9 @@ static unsigned char refname_disposition[256] = {
3339 * - any path component of it begins with ".", or
3440 * - it has double dots "..", or
3541 * - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
36- * - it ends with a "/".
37- * - it ends with ".lock"
42+ * - it has pattern-matching notation "*", "?", "[", anywhere, or
43+ * - it ends with a "/", or
44+ * - it ends with ".lock", or
3845 * - it contains a "\" (backslash)
3946 */
4047static int check_refname_component (const char * refname , int flags )
@@ -46,17 +53,19 @@ static int check_refname_component(const char *refname, int flags)
4653 int ch = * cp & 255 ;
4754 unsigned char disp = refname_disposition [ch ];
4855 switch (disp ) {
49- case 1 :
56+ case 2 : /* fall-through */
57+ case 3 :
5058 goto out ;
51- case 2 :
59+ case 4 :
5260 if (last == '.' )
5361 return -1 ; /* Refname contains "..". */
5462 break ;
55- case 3 :
63+ case 5 :
5664 if (last == '@' )
5765 return -1 ; /* Refname contains "@{". */
5866 break ;
59- case 4 :
67+ case 6 : /* fall-through */
68+ case 7 :
6069 return -1 ;
6170 }
6271 last = ch ;
@@ -79,7 +88,7 @@ static int check_refname_component(const char *refname, int flags)
7988 return cp - refname ;
8089}
8190
82- int check_refname_format (const char * refname , int flags )
91+ static int check_refname_format_bytewise (const char * refname , int flags )
8392{
8493 int component_len , component_count = 0 ;
8594
@@ -115,6 +124,195 @@ int check_refname_format(const char *refname, int flags)
115124 return 0 ;
116125}
117126
127+ #if defined(__GNUC__ ) && defined(__x86_64__ )
128+ #define SSE_VECTOR_BYTES 16
129+
130+ /* Vectorized version of check_refname_format. */
131+ int check_refname_format (const char * refname , int flags )
132+ {
133+ const char * cp = refname ;
134+
135+ const __m128i dot = _mm_set1_epi8 ('.' );
136+ const __m128i at = _mm_set1_epi8 ('@' );
137+ const __m128i curly = _mm_set1_epi8 ('{' );
138+ const __m128i slash = _mm_set1_epi8 ('/' );
139+ const __m128i zero = _mm_set1_epi8 ('\000' );
140+ const __m128i el = _mm_set1_epi8 ('l' );
141+
142+ /* below '*', all characters are forbidden or rare */
143+ const __m128i star_ub = _mm_set1_epi8 ('*' + 1 );
144+
145+ const __m128i colon = _mm_set1_epi8 (':' );
146+ const __m128i question = _mm_set1_epi8 ('?' );
147+
148+ /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
149+ const __m128i bracket_lb = _mm_set1_epi8 ('[' - 1 );
150+ const __m128i caret_ub = _mm_set1_epi8 ('^' + 1 );
151+
152+ /* '~' and above are forbidden */
153+ const __m128i tilde_lb = _mm_set1_epi8 ('~' - 1 );
154+
155+ int component_count = 0 ;
156+
157+ if (refname [0 ] == 0 || refname [0 ] == '/' ) {
158+ /* entirely empty ref or initial ref component */
159+ return -1 ;
160+ }
161+
162+ /*
163+ * Initial ref component of '.'; below we look for /. so we'll
164+ * miss this.
165+ */
166+ if (refname [0 ] == '.' ) {
167+ if (refname [1 ] == '/' || refname [1 ] == '\0' )
168+ return -1 ;
169+ if (!(flags & REFNAME_DOT_COMPONENT ))
170+ return -1 ;
171+ }
172+ while (1 ) {
173+ __m128i tmp , tmp1 , result ;
174+ uint64_t mask ;
175+
176+ if ((uintptr_t ) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1 )
177+ /*
178+ * End-of-page; fall back to slow method for
179+ * this entire ref.
180+ */
181+ return check_refname_format_bytewise (refname , flags );
182+
183+ tmp = _mm_loadu_si128 ((__m128i * )cp );
184+ tmp1 = _mm_loadu_si128 ((__m128i * )(cp + 1 ));
185+
186+ /*
187+ * This range (note the lt) contains some
188+ * permissible-but-rare characters (including all
189+ * characters >= 128), which we handle later. It also
190+ * includes \000.
191+ */
192+ result = _mm_cmplt_epi8 (tmp , star_ub );
193+
194+ result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , question ));
195+ result = _mm_or_si128 (result , _mm_cmpeq_epi8 (tmp , colon ));
196+
197+ /* This range contains the permissible ] as bycatch */
198+ result = _mm_or_si128 (result , _mm_and_si128 (
199+ _mm_cmpgt_epi8 (tmp , bracket_lb ),
200+ _mm_cmplt_epi8 (tmp , caret_ub )));
201+
202+ result = _mm_or_si128 (result , _mm_cmpgt_epi8 (tmp , tilde_lb ));
203+
204+ /* .. */
205+ result = _mm_or_si128 (result , _mm_and_si128 (
206+ _mm_cmpeq_epi8 (tmp , dot ),
207+ _mm_cmpeq_epi8 (tmp1 , dot )));
208+ /* @{ */
209+ result = _mm_or_si128 (result , _mm_and_si128 (
210+ _mm_cmpeq_epi8 (tmp , at ),
211+ _mm_cmpeq_epi8 (tmp1 , curly )));
212+ /* // */
213+ result = _mm_or_si128 (result , _mm_and_si128 (
214+ _mm_cmpeq_epi8 (tmp , slash ),
215+ _mm_cmpeq_epi8 (tmp1 , slash )));
216+ /* trailing / */
217+ result = _mm_or_si128 (result , _mm_and_si128 (
218+ _mm_cmpeq_epi8 (tmp , slash ),
219+ _mm_cmpeq_epi8 (tmp1 , zero )));
220+ /* .l, beginning of .lock */
221+ result = _mm_or_si128 (result , _mm_and_si128 (
222+ _mm_cmpeq_epi8 (tmp , dot ),
223+ _mm_cmpeq_epi8 (tmp1 , el )));
224+ /*
225+ * Even though /. is not necessarily an error, we flag
226+ * it anyway. If we find it, we'll check if it's valid
227+ * and if so we'll advance just past it.
228+ */
229+ result = _mm_or_si128 (result , _mm_and_si128 (
230+ _mm_cmpeq_epi8 (tmp , slash ),
231+ _mm_cmpeq_epi8 (tmp1 , dot )));
232+
233+ mask = _mm_movemask_epi8 (result );
234+ if (mask ) {
235+ /*
236+ * We've found either end-of-string, or some
237+ * probably-bad character or substring.
238+ */
239+ int i = __builtin_ctz (mask );
240+ switch (refname_disposition [cp [i ] & 255 ]) {
241+ case 0 : /* fall-through */
242+ case 5 :
243+ /*
244+ * bycatch: a good character that's in
245+ * one of the ranges of mostly-forbidden
246+ * characters
247+ */
248+ cp += i + 1 ;
249+ break ;
250+ case 1 :
251+ if (cp [i + 1 ] == '{' )
252+ return -1 ;
253+ cp += i + 1 ;
254+ break ;
255+ case 2 :
256+ if (!(flags & REFNAME_ALLOW_ONELEVEL )
257+ && !component_count && !strchr (refname , '/' ))
258+ /* Refname has only one component. */
259+ return -1 ;
260+ return 0 ;
261+ case 3 :
262+ component_count ++ ;
263+ /*
264+ * Even if leading dots are allowed, don't
265+ * allow "." as a component (".." is
266+ * prevented by case 4 below).
267+ */
268+ if (cp [i + 1 ] == '.' ) {
269+ if (cp [i + 2 ] == '\0' )
270+ return -1 ;
271+ if (flags & REFNAME_DOT_COMPONENT ) {
272+ /* skip to just after the /. */
273+ cp += i + 2 ;
274+ break ;
275+ }
276+ return -1 ;
277+ } else if (cp [i + 1 ] == '/' || cp [i + 1 ] == '\0' )
278+ return -1 ;
279+ break ;
280+ case 4 :
281+ if (cp [i + 1 ] == '.' || cp [i + 1 ] == '\0' )
282+ return -1 ;
283+ /* .lock as end-of-component or end-of-string */
284+ if ((!strncmp (cp + i , ".lock" , 5 ))
285+ && (cp [i + 5 ] == '/' || cp [i + 5 ] == 0 ))
286+ return -1 ;
287+ cp += 1 ;
288+ break ;
289+ case 6 :
290+ if (((cp == refname + i ) || cp [i - 1 ] == '/' )
291+ && (cp [i + 1 ] == '/' || cp [i + 1 ] == 0 ))
292+ if (flags & REFNAME_REFSPEC_PATTERN ) {
293+ flags &= ~REFNAME_REFSPEC_PATTERN ;
294+ /* restart after the * */
295+ cp += i + 1 ;
296+ continue ;
297+ }
298+ /* fall-through */
299+ case 7 :
300+ return -1 ;
301+ }
302+ } else
303+ cp += SSE_VECTOR_BYTES ;
304+ }
305+ }
306+
307+ #else
308+
309+ int check_refname_format (const char * refname , int flags )
310+ {
311+ return check_refname_format_bytewise (refname , flags );
312+ }
313+
314+ #endif
315+
118316struct ref_entry ;
119317
120318/*
0 commit comments