Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 527cda0

Browse files
committed
Merge pull request #1816 from stephentoub/string_indexof_perf
Improve string.{Last}IndexOf perf on Unix for Ordinal/OrdinalIgnoreCase
2 parents c2f0c1f + 91f0f8b commit 527cda0

File tree

3 files changed

+127
-29
lines changed

3 files changed

+127
-29
lines changed

src/corefx/System.Globalization.Native/collation.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,79 @@ extern "C" int32_t LastIndexOf(
125125
return result;
126126
}
127127

128+
/*
129+
Static Function:
130+
AreEqualOrdinalIgnoreCase
131+
*/
132+
static bool AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two)
133+
{
134+
// Return whether the two characters are identical or would be identical if they were upper-cased.
135+
136+
if (one == two)
137+
{
138+
return true;
139+
}
140+
141+
if (one == 0x0131 || two == 0x0131)
142+
{
143+
// On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
144+
// capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
145+
// We special case it to match the Windows invariant behavior.
146+
return false;
147+
}
148+
149+
return u_toupper(one) == u_toupper(two);
150+
}
151+
152+
/*
153+
Function:
154+
IndexOfOrdinalIgnoreCase
155+
*/
156+
extern "C" int32_t
157+
IndexOfOrdinalIgnoreCase(
158+
const UChar* lpTarget, int32_t cwTargetLength,
159+
const UChar* lpSource, int32_t cwSourceLength,
160+
int32_t findLast)
161+
{
162+
int32_t result = -1;
163+
164+
int32_t endIndex = cwSourceLength - cwTargetLength;
165+
assert(endIndex >= 0);
166+
167+
int32_t i = 0;
168+
while (i <= endIndex)
169+
{
170+
int32_t srcIdx = i, trgIdx = 0;
171+
const UChar *src = lpSource, *trg = lpTarget;
172+
UChar32 srcCodepoint, trgCodepoint;
173+
174+
bool match = true;
175+
while (trgIdx < cwTargetLength)
176+
{
177+
U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint);
178+
U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint);
179+
if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint))
180+
{
181+
match = false;
182+
break;
183+
}
184+
}
185+
186+
if (match)
187+
{
188+
result = i;
189+
if (!findLast)
190+
{
191+
break;
192+
}
193+
}
194+
195+
U16_FWD_1(lpSource, i, cwSourceLength);
196+
}
197+
198+
return result;
199+
}
200+
128201
/*
129202
Return value is a "Win32 BOOL" (1 = true, 0 = false)
130203
*/

src/mscorlib/corefx/Interop/Unix/System.Globalization.Native/Interop.Collation.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ internal static partial class GlobalizationInterop
1818
[DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
1919
internal unsafe static extern int LastIndexOf(byte[] localeName, string target, char* pSource, int cwSourceLength, CompareOptions options);
2020

21+
[DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
22+
internal unsafe static extern int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast);
23+
2124
[DllImport(Libraries.GlobalizationInterop, CharSet = CharSet.Unicode)]
2225
[return: MarshalAs(UnmanagedType.Bool)]
2326
internal unsafe static extern bool StartsWith(byte[] localeName, string target, string source, int cwSourceLength, CompareOptions options);

src/mscorlib/corefx/System/Globalization/CompareInfo.Unix.cs

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ internal unsafe CompareInfo(CultureInfo culture)
2020
m_sortNameAsUtf8 = System.Text.Encoding.UTF8.GetBytes(m_sortName);
2121
}
2222

23-
internal static int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
23+
internal static unsafe int IndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
2424
{
2525
Contract.Assert(source != null);
2626
Contract.Assert(value != null);
@@ -30,33 +30,41 @@ internal static int IndexOfOrdinal(string source, string value, int startIndex,
3030
return startIndex;
3131
}
3232

33-
// TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring.
33+
if (count < value.Length)
34+
{
35+
return -1;
36+
}
3437

3538
if (ignoreCase)
3639
{
37-
source = source.ToUpper(CultureInfo.InvariantCulture);
38-
value = value.ToUpper(CultureInfo.InvariantCulture);
40+
fixed (char* pSource = source)
41+
{
42+
int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false);
43+
return index != -1 ?
44+
startIndex + index :
45+
-1;
46+
}
3947
}
4048

41-
source = source.Substring(startIndex, count);
42-
43-
for (int i = 0; i + value.Length <= source.Length; i++)
49+
int endIndex = startIndex + (count - value.Length);
50+
for (int i = startIndex; i <= endIndex; i++)
4451
{
45-
for (int j = 0; j < value.Length; j++) {
46-
if (source[i + j] != value[j]) {
47-
break;
48-
}
52+
int valueIndex, sourceIndex;
53+
54+
for (valueIndex = 0, sourceIndex = i;
55+
valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
56+
valueIndex++, sourceIndex++) ;
4957

50-
if (j == value.Length - 1) {
51-
return i + startIndex;
52-
}
58+
if (valueIndex == value.Length)
59+
{
60+
return i;
5361
}
5462
}
5563

5664
return -1;
5765
}
5866

59-
internal static int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
67+
internal static unsafe int LastIndexOfOrdinal(string source, string value, int startIndex, int count, bool ignoreCase)
6068
{
6169
Contract.Assert(source != null);
6270
Contract.Assert(value != null);
@@ -66,27 +74,41 @@ internal static int LastIndexOfOrdinal(string source, string value, int startInd
6674
return startIndex;
6775
}
6876

69-
// TODO (dotnet/corefx#3468): Move this into the shim so we don't have to do the ToUpper or call substring.
77+
if (count < value.Length)
78+
{
79+
return -1;
80+
}
81+
82+
// startIndex is the index into source where we start search backwards from.
83+
// leftStartIndex is the index into source of the start of the string that is
84+
// count characters away from startIndex.
85+
int leftStartIndex = startIndex - count + 1;
7086

7187
if (ignoreCase)
7288
{
73-
source = source.ToUpper(CultureInfo.InvariantCulture);
74-
value = value.ToUpper(CultureInfo.InvariantCulture);
89+
fixed (char* pSource = source)
90+
{
91+
int lastIndex = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
92+
return lastIndex != -1 ?
93+
leftStartIndex + lastIndex :
94+
-1;
95+
}
7596
}
7697

77-
source = source.Substring(startIndex - count + 1, count);
98+
for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
99+
{
100+
int valueIndex, sourceIndex;
78101

79-
int last = -1;
102+
for (valueIndex = 0, sourceIndex = i;
103+
valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
104+
valueIndex++, sourceIndex++) ;
80105

81-
int cur = 0;
82-
while ((cur = IndexOfOrdinal(source, value, last + 1, source.Length - last - 1, false)) != -1)
83-
{
84-
last = cur;
106+
if (valueIndex == value.Length) {
107+
return i;
108+
}
85109
}
86110

87-
return last >= 0 ?
88-
last + startIndex - count + 1 :
89-
-1;
111+
return -1;
90112
}
91113

92114
private unsafe int GetHashCodeOfStringCore(string source, CompareOptions options)
@@ -138,9 +160,9 @@ private unsafe int IndexOfCore(string source, string target, int startIndex, int
138160

139161
fixed (char* pSource = source)
140162
{
141-
int lastIndex = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options);
163+
int index = Interop.GlobalizationInterop.IndexOf(m_sortNameAsUtf8, target, pSource + startIndex, count, options);
142164

143-
return lastIndex != -1 ? lastIndex + startIndex : -1;
165+
return index != -1 ? index + startIndex : -1;
144166
}
145167
}
146168

0 commit comments

Comments
 (0)