Skip to content

Commit 7b9de66

Browse files
randall77gopherbot
authored andcommitted
unicode/utf8: skip ahead during ascii runs in Valid/ValidString
When we see an ASCII character, we will probably see many. Grab & check increasingly large chunks of the string for ASCII-only-ness. Also redo some of the non-ASCII code to make it more optimizer friendly. goos: linux goarch: amd64 pkg: unicode/utf8 cpu: 12th Gen Intel(R) Core(TM) i7-12700 │ base │ exp │ │ sec/op │ sec/op vs base │ ValidTenASCIIChars-20 3.596n ± 3% 2.522n ± 1% -29.86% (p=0.000 n=10) Valid100KASCIIChars-20 6.094µ ± 2% 2.115µ ± 1% -65.29% (p=0.000 n=10) ValidTenJapaneseChars-20 21.02n ± 0% 18.61n ± 2% -11.44% (p=0.000 n=10) ValidLongMostlyASCII-20 51.774µ ± 0% 3.836µ ± 1% -92.59% (p=0.000 n=10) ValidLongJapanese-20 102.40µ ± 1% 50.95µ ± 1% -50.24% (p=0.000 n=10) ValidStringTenASCIIChars-20 2.640n ± 3% 2.526n ± 1% -4.34% (p=0.000 n=10) ValidString100KASCIIChars-20 5.585µ ± 7% 2.118µ ± 1% -62.07% (p=0.000 n=10) ValidStringTenJapaneseChars-20 21.29n ± 2% 18.67n ± 1% -12.31% (p=0.000 n=10) ValidStringLongMostlyASCII-20 52.431µ ± 1% 3.841µ ± 0% -92.67% (p=0.000 n=10) ValidStringLongJapanese-20 102.66µ ± 1% 50.90µ ± 1% -50.42% (p=0.000 n=10) geomean 1.152µ 454.8n -60.53% This is an attempt to see if we can get enough performance that we don't need to consider assembly like that in CL 681695. Change-Id: I8250feb797a6b4e7d335c23929f6e3acc8b24840 Reviewed-on: https://go-review.googlesource.com/c/go/+/682778 Reviewed-by: Cuong Manh Le <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Auto-Submit: Keith Randall <[email protected]> Reviewed-by: Keith Randall <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 076eae4 commit 7b9de66

File tree

2 files changed

+93
-71
lines changed

2 files changed

+93
-71
lines changed

src/unicode/utf8/utf8.go

Lines changed: 83 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -430,99 +430,111 @@ func RuneCountInString(s string) (n int) {
430430
// bits set to 10.
431431
func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
432432

433+
const ptrSize = 4 << (^uintptr(0) >> 63)
434+
const hiBits = 0x8080808080808080 >> (64 - 8*ptrSize)
435+
436+
func word[T string | []byte](s T) uintptr {
437+
if ptrSize == 4 {
438+
return uintptr(s[0]) | uintptr(s[1])<<8 | uintptr(s[2])<<16 | uintptr(s[3])<<24
439+
}
440+
return uintptr(uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56)
441+
}
442+
433443
// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
434444
func Valid(p []byte) bool {
435445
// This optimization avoids the need to recompute the capacity
436-
// when generating code for p[8:], bringing it to parity with
446+
// when generating code for slicing p, bringing it to parity with
437447
// ValidString, which was 20% faster on long ASCII strings.
438448
p = p[:len(p):len(p)]
439449

440-
// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
441-
for len(p) >= 8 {
442-
// Combining two 32 bit loads allows the same code to be used
443-
// for 32 and 64 bit platforms.
444-
// The compiler can generate a 32bit load for first32 and second32
445-
// on many platforms. See test/codegen/memcombine.go.
446-
first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
447-
second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
448-
if (first32|second32)&0x80808080 != 0 {
449-
// Found a non ASCII byte (>= RuneSelf).
450-
break
451-
}
452-
p = p[8:]
453-
}
454-
n := len(p)
455-
for i := 0; i < n; {
456-
pi := p[i]
457-
if pi < RuneSelf {
458-
i++
450+
for len(p) > 0 {
451+
p0 := p[0]
452+
if p0 < RuneSelf {
453+
p = p[1:]
454+
// If there's one ASCII byte, there are probably more.
455+
// Advance quickly through ASCII-only data.
456+
// Note: using > instead of >= here is intentional. That avoids
457+
// needing pointing-past-the-end fixup on the slice operations.
458+
if len(p) > ptrSize && word(p)&hiBits == 0 {
459+
p = p[ptrSize:]
460+
if len(p) > 2*ptrSize && (word(p)|word(p[ptrSize:]))&hiBits == 0 {
461+
p = p[2*ptrSize:]
462+
for len(p) > 4*ptrSize && ((word(p)|word(p[ptrSize:]))|(word(p[2*ptrSize:])|word(p[3*ptrSize:])))&hiBits == 0 {
463+
p = p[4*ptrSize:]
464+
}
465+
}
466+
}
459467
continue
460468
}
461-
x := first[pi]
462-
if x == xx {
463-
return false // Illegal starter byte.
464-
}
469+
x := first[p0]
465470
size := int(x & 7)
466-
if i+size > n {
467-
return false // Short or invalid.
468-
}
469471
accept := acceptRanges[x>>4]
470-
if c := p[i+1]; c < accept.lo || accept.hi < c {
471-
return false
472-
} else if size == 2 {
473-
} else if c := p[i+2]; c < locb || hicb < c {
474-
return false
475-
} else if size == 3 {
476-
} else if c := p[i+3]; c < locb || hicb < c {
477-
return false
472+
switch size {
473+
case 2:
474+
if len(p) < 2 || p[1] < accept.lo || accept.hi < p[1] {
475+
return false
476+
}
477+
p = p[2:]
478+
case 3:
479+
if len(p) < 3 || p[1] < accept.lo || accept.hi < p[1] || p[2] < locb || hicb < p[2] {
480+
return false
481+
}
482+
p = p[3:]
483+
case 4:
484+
if len(p) < 4 || p[1] < accept.lo || accept.hi < p[1] || p[2] < locb || hicb < p[2] || p[3] < locb || hicb < p[3] {
485+
return false
486+
}
487+
p = p[4:]
488+
default:
489+
return false // illegal starter byte
478490
}
479-
i += size
480491
}
481492
return true
482493
}
483494

484495
// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
485496
func ValidString(s string) bool {
486-
// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
487-
for len(s) >= 8 {
488-
// Combining two 32 bit loads allows the same code to be used
489-
// for 32 and 64 bit platforms.
490-
// The compiler can generate a 32bit load for first32 and second32
491-
// on many platforms. See test/codegen/memcombine.go.
492-
first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
493-
second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24
494-
if (first32|second32)&0x80808080 != 0 {
495-
// Found a non ASCII byte (>= RuneSelf).
496-
break
497-
}
498-
s = s[8:]
499-
}
500-
n := len(s)
501-
for i := 0; i < n; {
502-
si := s[i]
503-
if si < RuneSelf {
504-
i++
497+
for len(s) > 0 {
498+
s0 := s[0]
499+
if s0 < RuneSelf {
500+
s = s[1:]
501+
// If there's one ASCII byte, there are probably more.
502+
// Advance quickly through ASCII-only data.
503+
// Note: using > instead of >= here is intentional. That avoids
504+
// needing pointing-past-the-end fixup on the slice operations.
505+
if len(s) > ptrSize && word(s)&hiBits == 0 {
506+
s = s[ptrSize:]
507+
if len(s) > 2*ptrSize && (word(s)|word(s[ptrSize:]))&hiBits == 0 {
508+
s = s[2*ptrSize:]
509+
for len(s) > 4*ptrSize && ((word(s)|word(s[ptrSize:]))|(word(s[2*ptrSize:])|word(s[3*ptrSize:])))&hiBits == 0 {
510+
s = s[4*ptrSize:]
511+
}
512+
}
513+
}
505514
continue
506515
}
507-
x := first[si]
508-
if x == xx {
509-
return false // Illegal starter byte.
510-
}
516+
x := first[s0]
511517
size := int(x & 7)
512-
if i+size > n {
513-
return false // Short or invalid.
514-
}
515518
accept := acceptRanges[x>>4]
516-
if c := s[i+1]; c < accept.lo || accept.hi < c {
517-
return false
518-
} else if size == 2 {
519-
} else if c := s[i+2]; c < locb || hicb < c {
520-
return false
521-
} else if size == 3 {
522-
} else if c := s[i+3]; c < locb || hicb < c {
523-
return false
519+
switch size {
520+
case 2:
521+
if len(s) < 2 || s[1] < accept.lo || accept.hi < s[1] {
522+
return false
523+
}
524+
s = s[2:]
525+
case 3:
526+
if len(s) < 3 || s[1] < accept.lo || accept.hi < s[1] || s[2] < locb || hicb < s[2] {
527+
return false
528+
}
529+
s = s[3:]
530+
case 4:
531+
if len(s) < 4 || s[1] < accept.lo || accept.hi < s[1] || s[2] < locb || hicb < s[2] || s[3] < locb || hicb < s[3] {
532+
return false
533+
}
534+
s = s[4:]
535+
default:
536+
return false // illegal starter byte
524537
}
525-
i += size
526538
}
527539
return true
528540
}

src/unicode/utf8/utf8_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,16 @@ var validTests = []ValidTest{
489489
{string("\xed\xbf\xbf"), false}, // U+DFFF low surrogate (sic)
490490
}
491491

492+
func init() {
493+
for i := range 100 {
494+
validTests = append(validTests, ValidTest{in: strings.Repeat("a", i), out: true})
495+
validTests = append(validTests, ValidTest{in: strings.Repeat("a", i) + "Ж", out: true})
496+
validTests = append(validTests, ValidTest{in: strings.Repeat("a", i) + "\xe2", out: false})
497+
validTests = append(validTests, ValidTest{in: strings.Repeat("a", i) + "Ж" + strings.Repeat("b", i), out: true})
498+
validTests = append(validTests, ValidTest{in: strings.Repeat("a", i) + "\xe2" + strings.Repeat("b", i), out: false})
499+
}
500+
}
501+
492502
func TestValid(t *testing.T) {
493503
for _, tt := range validTests {
494504
if Valid([]byte(tt.in)) != tt.out {

0 commit comments

Comments
 (0)