Skip to content
This repository was archived by the owner on Nov 9, 2017. It is now read-only.

Commit 745224e

Browse files
dturner-twgitster
authored andcommitted
refs.c: SSE2 optimizations for check_refname_component
Optimize check_refname_component using SSE2 on x86_64. git rev-parse HEAD is a good test-case for this, since it does almost nothing except parse refs. For one particular repo with about 60k refs, almost all packed, the timings are: Look up table: 29 ms SSE2: 23 ms This cuts about 20% off of the runtime. Ondřej Bílka <[email protected]> suggested an SSE2 approach to the substring searches, which netted a speed boost over the SSE4.2 code I had initially written. Signed-off-by: David Turner <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent dde8a90 commit 745224e

File tree

4 files changed

+250
-18
lines changed

4 files changed

+250
-18
lines changed

git-compat-util.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,17 @@ void git_qsort(void *base, size_t nmemb, size_t size,
677677
#endif
678678
#endif
679679

680+
#if defined(__GNUC__) && defined(__x86_64__)
681+
#include <emmintrin.h>
682+
/*
683+
* This is the system memory page size; it's used so that we can read
684+
* outside the bounds of an allocation without segfaulting.
685+
*/
686+
#ifndef PAGE_SIZE
687+
#define PAGE_SIZE 4096
688+
#endif
689+
#endif
690+
680691
#ifdef UNRELIABLE_FSTAT
681692
#define fstat_is_reliable() 0
682693
#else

refs.c

Lines changed: 216 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,27 @@
77

88
/*
99
* How to handle various characters in refnames:
10+
* This table is used by both the SIMD and non-SIMD code. It has
11+
* some cases that are only useful for the SIMD; these are handled
12+
* equivalently to the listed disposition in the non-SIMD code.
1013
* 0: An acceptable character for refs
11-
* 1: End-of-component
12-
* 2: ., look for a preceding . to reject .. in refs
13-
* 3: {, look for a preceding @ to reject @{ in refs
14-
* 4: A bad character: ASCII control characters, "~", "^", ":" or SP
14+
* 1: @, look for a following { to reject @{ in refs (SIMD or = 0)
15+
* 2: \0: End-of-component and string
16+
* 3: /: End-of-component (SIMD or = 2)
17+
* 4: ., look for a preceding . to reject .. in refs
18+
* 5: {, look for a preceding @ to reject @{ in refs
19+
* 6: *, usually a bad character except, once as a wildcard (SIMD or = 7)
20+
* 7: A bad character except * (see check_refname_component below)
1521
*/
1622
static unsigned char refname_disposition[256] = {
17-
1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
18-
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
19-
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
20-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
23+
2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
24+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
25+
7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 4, 3,
26+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7,
27+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 7, 0,
2129
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
23-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
30+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 7, 7
2531
};
2632

2733
/*
@@ -33,8 +39,9 @@ static unsigned char refname_disposition[256] = {
3339
* - any path component of it begins with ".", or
3440
* - it has double dots "..", or
3541
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
36-
* - it ends with a "/".
37-
* - it ends with ".lock"
42+
* - it has pattern-matching notation "*", "?", "[", anywhere, or
43+
* - it ends with a "/", or
44+
* - it ends with ".lock", or
3845
* - it contains a "\" (backslash)
3946
*/
4047
static int check_refname_component(const char *refname, int flags)
@@ -46,17 +53,19 @@ static int check_refname_component(const char *refname, int flags)
4653
int ch = *cp & 255;
4754
unsigned char disp = refname_disposition[ch];
4855
switch (disp) {
49-
case 1:
56+
case 2: /* fall-through */
57+
case 3:
5058
goto out;
51-
case 2:
59+
case 4:
5260
if (last == '.')
5361
return -1; /* Refname contains "..". */
5462
break;
55-
case 3:
63+
case 5:
5664
if (last == '@')
5765
return -1; /* Refname contains "@{". */
5866
break;
59-
case 4:
67+
case 6: /* fall-through */
68+
case 7:
6069
return -1;
6170
}
6271
last = ch;
@@ -79,7 +88,7 @@ static int check_refname_component(const char *refname, int flags)
7988
return cp - refname;
8089
}
8190

82-
int check_refname_format(const char *refname, int flags)
91+
static int check_refname_format_bytewise(const char *refname, int flags)
8392
{
8493
int component_len, component_count = 0;
8594

@@ -115,6 +124,195 @@ int check_refname_format(const char *refname, int flags)
115124
return 0;
116125
}
117126

127+
#if defined(__GNUC__) && defined(__x86_64__)
128+
#define SSE_VECTOR_BYTES 16
129+
130+
/* Vectorized version of check_refname_format. */
131+
int check_refname_format(const char *refname, int flags)
132+
{
133+
const char *cp = refname;
134+
135+
const __m128i dot = _mm_set1_epi8('.');
136+
const __m128i at = _mm_set1_epi8('@');
137+
const __m128i curly = _mm_set1_epi8('{');
138+
const __m128i slash = _mm_set1_epi8('/');
139+
const __m128i zero = _mm_set1_epi8('\000');
140+
const __m128i el = _mm_set1_epi8('l');
141+
142+
/* below '*', all characters are forbidden or rare */
143+
const __m128i star_ub = _mm_set1_epi8('*' + 1);
144+
145+
const __m128i colon = _mm_set1_epi8(':');
146+
const __m128i question = _mm_set1_epi8('?');
147+
148+
/* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
149+
const __m128i bracket_lb = _mm_set1_epi8('[' - 1);
150+
const __m128i caret_ub = _mm_set1_epi8('^' + 1);
151+
152+
/* '~' and above are forbidden */
153+
const __m128i tilde_lb = _mm_set1_epi8('~' - 1);
154+
155+
int component_count = 0;
156+
157+
if (refname[0] == 0 || refname[0] == '/') {
158+
/* entirely empty ref or initial ref component */
159+
return -1;
160+
}
161+
162+
/*
163+
* Initial ref component of '.'; below we look for /. so we'll
164+
* miss this.
165+
*/
166+
if (refname[0] == '.') {
167+
if (refname[1] == '/' || refname[1] == '\0')
168+
return -1;
169+
if (!(flags & REFNAME_DOT_COMPONENT))
170+
return -1;
171+
}
172+
while(1) {
173+
__m128i tmp, tmp1, result;
174+
uint64_t mask;
175+
176+
if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1)
177+
/*
178+
* End-of-page; fall back to slow method for
179+
* this entire ref.
180+
*/
181+
return check_refname_format_bytewise(refname, flags);
182+
183+
tmp = _mm_loadu_si128((__m128i *)cp);
184+
tmp1 = _mm_loadu_si128((__m128i *)(cp + 1));
185+
186+
/*
187+
* This range (note the lt) contains some
188+
* permissible-but-rare characters (including all
189+
* characters >= 128), which we handle later. It also
190+
* includes \000.
191+
*/
192+
result = _mm_cmplt_epi8(tmp, star_ub);
193+
194+
result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question));
195+
result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon));
196+
197+
/* This range contains the permissible ] as bycatch */
198+
result = _mm_or_si128(result, _mm_and_si128(
199+
_mm_cmpgt_epi8(tmp, bracket_lb),
200+
_mm_cmplt_epi8(tmp, caret_ub)));
201+
202+
result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb));
203+
204+
/* .. */
205+
result = _mm_or_si128(result, _mm_and_si128(
206+
_mm_cmpeq_epi8(tmp, dot),
207+
_mm_cmpeq_epi8(tmp1, dot)));
208+
/* @{ */
209+
result = _mm_or_si128(result, _mm_and_si128(
210+
_mm_cmpeq_epi8(tmp, at),
211+
_mm_cmpeq_epi8(tmp1, curly)));
212+
/* // */
213+
result = _mm_or_si128(result, _mm_and_si128(
214+
_mm_cmpeq_epi8(tmp, slash),
215+
_mm_cmpeq_epi8(tmp1, slash)));
216+
/* trailing / */
217+
result = _mm_or_si128(result, _mm_and_si128(
218+
_mm_cmpeq_epi8(tmp, slash),
219+
_mm_cmpeq_epi8(tmp1, zero)));
220+
/* .l, beginning of .lock */
221+
result = _mm_or_si128(result, _mm_and_si128(
222+
_mm_cmpeq_epi8(tmp, dot),
223+
_mm_cmpeq_epi8(tmp1, el)));
224+
/*
225+
* Even though /. is not necessarily an error, we flag
226+
* it anyway. If we find it, we'll check if it's valid
227+
* and if so we'll advance just past it.
228+
*/
229+
result = _mm_or_si128(result, _mm_and_si128(
230+
_mm_cmpeq_epi8(tmp, slash),
231+
_mm_cmpeq_epi8(tmp1, dot)));
232+
233+
mask = _mm_movemask_epi8(result);
234+
if (mask) {
235+
/*
236+
* We've found either end-of-string, or some
237+
* probably-bad character or substring.
238+
*/
239+
int i = __builtin_ctz(mask);
240+
switch (refname_disposition[cp[i] & 255]) {
241+
case 0: /* fall-through */
242+
case 5:
243+
/*
244+
* bycatch: a good character that's in
245+
* one of the ranges of mostly-forbidden
246+
* characters
247+
*/
248+
cp += i + 1;
249+
break;
250+
case 1:
251+
if (cp[i + 1] == '{')
252+
return -1;
253+
cp += i + 1;
254+
break;
255+
case 2:
256+
if (!(flags & REFNAME_ALLOW_ONELEVEL)
257+
&& !component_count && !strchr(refname, '/'))
258+
/* Refname has only one component. */
259+
return -1;
260+
return 0;
261+
case 3:
262+
component_count ++;
263+
/*
264+
* Even if leading dots are allowed, don't
265+
* allow "." as a component (".." is
266+
* prevented by case 4 below).
267+
*/
268+
if (cp[i + 1] == '.') {
269+
if (cp[i + 2] == '\0')
270+
return -1;
271+
if (flags & REFNAME_DOT_COMPONENT) {
272+
/* skip to just after the /. */
273+
cp += i + 2;
274+
break;
275+
}
276+
return -1;
277+
} else if (cp[i + 1] == '/' || cp[i + 1] == '\0')
278+
return -1;
279+
break;
280+
case 4:
281+
if (cp[i + 1] == '.' || cp[i + 1] == '\0')
282+
return -1;
283+
/* .lock as end-of-component or end-of-string */
284+
if ((!strncmp(cp + i, ".lock", 5))
285+
&& (cp[i + 5] == '/' || cp[i + 5] == 0))
286+
return -1;
287+
cp += 1;
288+
break;
289+
case 6:
290+
if (((cp == refname + i) || cp[i - 1] == '/')
291+
&& (cp[i + 1] == '/' || cp[i + 1] == 0))
292+
if (flags & REFNAME_REFSPEC_PATTERN) {
293+
flags &= ~REFNAME_REFSPEC_PATTERN;
294+
/* restart after the * */
295+
cp += i + 1;
296+
continue;
297+
}
298+
/* fall-through */
299+
case 7:
300+
return -1;
301+
}
302+
} else
303+
cp += SSE_VECTOR_BYTES;
304+
}
305+
}
306+
307+
#else
308+
309+
int check_refname_format (const char *refname, int flags)
310+
{
311+
return check_refname_format_bytewise(refname, flags);
312+
}
313+
314+
#endif
315+
118316
struct ref_entry;
119317

120318
/*

t/t1402-check-ref-format.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ valid_ref "$(printf 'heads/fu\303\237')"
6464
invalid_ref 'heads/*foo/bar' --refspec-pattern
6565
invalid_ref 'heads/foo*/bar' --refspec-pattern
6666
invalid_ref 'heads/f*o/bar' --refspec-pattern
67+
invalid_ref 'heads/foo*//bar' --refspec-pattern
6768

6869
ref='foo'
6970
invalid_ref "$ref"
@@ -128,6 +129,20 @@ valid_ref NOT_MINGW "$ref" '--allow-onelevel --normalize'
128129
invalid_ref NOT_MINGW "$ref" '--refspec-pattern --normalize'
129130
valid_ref NOT_MINGW "$ref" '--refspec-pattern --allow-onelevel --normalize'
130131

132+
133+
valid_ref 'refs/heads/a-very-long-refname'
134+
invalid_ref 'refs/heads/.a-very-long-refname'
135+
invalid_ref 'refs/heads/abcdefgh0123..'
136+
invalid_ref 'refs/heads/abcdefgh01234..'
137+
invalid_ref 'refs/heads/abcdefgh012345..'
138+
invalid_ref 'refs/heads/abcdefgh0123456..'
139+
invalid_ref 'refs/heads/abcdefgh01234567..'
140+
valid_ref 'refs/heads/abcdefgh0123.a'
141+
valid_ref 'refs/heads/abcdefgh01234.a'
142+
valid_ref 'refs/heads/abcdefgh012345.a'
143+
valid_ref 'refs/heads/abcdefgh0123456.a'
144+
valid_ref 'refs/heads/abcdefgh01234567.a'
145+
131146
test_expect_success "check-ref-format --branch @{-1}" '
132147
T=$(git write-tree) &&
133148
sha1=$(echo A | git commit-tree $T) &&

t/valgrind/default.supp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,11 @@
4949
Memcheck:Addr4
5050
fun:copy_ref
5151
}
52+
{
53+
ignore-sse-check_refname_format
54+
Memcheck:Addr8
55+
fun:check_refname_format
56+
fun:cmd_check_ref_format
57+
fun:handle_builtin
58+
fun:main
59+
}

0 commit comments

Comments
 (0)