Skip to content

Commit 89d41d2

Browse files
jub0bsgopherbot
authored andcommitted
bytes, strings: speed up TrimSpace
This change lifts bounds checks out of loops in the TrimSpace functions, among other micro-optimizations. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: bytes cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ TrimSpace/NoTrim-8 4.406n ± 0% 3.829n ± 1% -13.11% (p=0.000 n=20) TrimSpace/ASCII-8 7.688n ± 1% 5.872n ± 1% -23.61% (p=0.000 n=20) TrimSpace/SomeNonASCII-8 82.25n ± 1% 81.00n ± 1% -1.51% (p=0.001 n=20) TrimSpace/JustNonASCII-8 131.6n ± 8% 132.2n ± 1% ~ (p=0.899 n=20) geomean 24.61n 22.15n -9.99% pkg: strings │ old │ new │ │ sec/op │ sec/op vs base │ TrimSpace/NoTrim-8 4.178n ± 0% 3.857n ± 2% -7.68% (p=0.001 n=20) TrimSpace/ASCII-8 7.708n ± 0% 5.585n ± 1% -27.55% (p=0.000 n=20) TrimSpace/SomeNonASCII-8 98.70n ± 1% 88.54n ± 1% -10.30% (p=0.000 n=20) TrimSpace/JustNonASCII-8 132.8n ± 2% 123.2n ± 0% -7.16% (p=0.000 n=20) geomean 25.49n 22.02n -13.61% Change-Id: I523f03a909c82a51940b44c7b2634985b7447982 GitHub-Last-Rev: 35163f0 GitHub-Pull-Request: #75127 Reviewed-on: https://go-review.googlesource.com/c/go/+/698735 Reviewed-by: Sean Liao <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Auto-Submit: Sean Liao <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 38204e0 commit 89d41d2

File tree

2 files changed

+42
-54
lines changed

2 files changed

+42
-54
lines changed

src/bytes/bytes.go

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,41 +1117,34 @@ func trimRightUnicode(s []byte, cutset string) []byte {
11171117
// TrimSpace returns a subslice of s by slicing off all leading and
11181118
// trailing white space, as defined by Unicode.
11191119
func TrimSpace(s []byte) []byte {
1120-
// Fast path for ASCII: look for the first ASCII non-space byte
1121-
start := 0
1122-
for ; start < len(s); start++ {
1123-
c := s[start]
1120+
// Fast path for ASCII: look for the first ASCII non-space byte.
1121+
for lo, c := range s {
11241122
if c >= utf8.RuneSelf {
11251123
// If we run into a non-ASCII byte, fall back to the
1126-
// slower unicode-aware method on the remaining bytes
1127-
return TrimFunc(s[start:], unicode.IsSpace)
1128-
}
1129-
if asciiSpace[c] == 0 {
1130-
break
1124+
// slower unicode-aware method on the remaining bytes.
1125+
return TrimFunc(s[lo:], unicode.IsSpace)
11311126
}
1132-
}
1133-
1134-
// Now look for the first ASCII non-space byte from the end
1135-
stop := len(s)
1136-
for ; stop > start; stop-- {
1137-
c := s[stop-1]
1138-
if c >= utf8.RuneSelf {
1139-
return TrimFunc(s[start:stop], unicode.IsSpace)
1127+
if asciiSpace[c] != 0 {
1128+
continue
11401129
}
1141-
if asciiSpace[c] == 0 {
1142-
break
1130+
s = s[lo:]
1131+
// Now look for the first ASCII non-space byte from the end.
1132+
for hi := len(s) - 1; hi >= 0; hi-- {
1133+
c := s[hi]
1134+
if c >= utf8.RuneSelf {
1135+
return TrimFunc(s[:hi+1], unicode.IsSpace)
1136+
}
1137+
if asciiSpace[c] == 0 {
1138+
// At this point, s[:hi+1] starts and ends with ASCII
1139+
// non-space bytes, so we're done. Non-ASCII cases have
1140+
// already been handled above.
1141+
return s[:hi+1]
1142+
}
11431143
}
11441144
}
1145-
1146-
// At this point s[start:stop] starts and ends with an ASCII
1147-
// non-space bytes, so we're done. Non-ASCII cases have already
1148-
// been handled above.
1149-
if start == stop {
1150-
// Special case to preserve previous TrimLeftFunc behavior,
1151-
// returning nil instead of empty slice if all spaces.
1152-
return nil
1153-
}
1154-
return s[start:stop]
1145+
// Special case to preserve previous TrimLeftFunc behavior,
1146+
// returning nil instead of empty slice if all spaces.
1147+
return nil
11551148
}
11561149

11571150
// Runes interprets s as a sequence of UTF-8-encoded code points.

src/strings/strings.go

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,37 +1091,32 @@ func trimRightUnicode(s, cutset string) string {
10911091
// TrimSpace returns a slice of the string s, with all leading
10921092
// and trailing white space removed, as defined by Unicode.
10931093
func TrimSpace(s string) string {
1094-
// Fast path for ASCII: look for the first ASCII non-space byte
1095-
start := 0
1096-
for ; start < len(s); start++ {
1097-
c := s[start]
1094+
// Fast path for ASCII: look for the first ASCII non-space byte.
1095+
for lo, c := range []byte(s) {
10981096
if c >= utf8.RuneSelf {
10991097
// If we run into a non-ASCII byte, fall back to the
1100-
// slower unicode-aware method on the remaining bytes
1101-
return TrimFunc(s[start:], unicode.IsSpace)
1102-
}
1103-
if asciiSpace[c] == 0 {
1104-
break
1098+
// slower unicode-aware method on the remaining bytes.
1099+
return TrimFunc(s[lo:], unicode.IsSpace)
11051100
}
1106-
}
1107-
1108-
// Now look for the first ASCII non-space byte from the end
1109-
stop := len(s)
1110-
for ; stop > start; stop-- {
1111-
c := s[stop-1]
1112-
if c >= utf8.RuneSelf {
1113-
// start has been already trimmed above, should trim end only
1114-
return TrimRightFunc(s[start:stop], unicode.IsSpace)
1101+
if asciiSpace[c] != 0 {
1102+
continue
11151103
}
1116-
if asciiSpace[c] == 0 {
1117-
break
1104+
s = s[lo:]
1105+
// Now look for the first ASCII non-space byte from the end.
1106+
for hi := len(s) - 1; hi >= 0; hi-- {
1107+
c := s[hi]
1108+
if c >= utf8.RuneSelf {
1109+
return TrimRightFunc(s[:hi+1], unicode.IsSpace)
1110+
}
1111+
if asciiSpace[c] == 0 {
1112+
// At this point, s[:hi+1] starts and ends with ASCII
1113+
// non-space bytes, so we're done. Non-ASCII cases have
1114+
// already been handled above.
1115+
return s[:hi+1]
1116+
}
11181117
}
11191118
}
1120-
1121-
// At this point s[start:stop] starts and ends with an ASCII
1122-
// non-space bytes, so we're done. Non-ASCII cases have already
1123-
// been handled above.
1124-
return s[start:stop]
1119+
return ""
11251120
}
11261121

11271122
// TrimPrefix returns s without the provided leading prefix string.

0 commit comments

Comments
 (0)