Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 44f26bf

Browse files
committed
Improve SortedSet ctor performance
- SortedSet's ctor has a loop that's O(D * N), where N is the number of elements being added to the set and D is the number of duplicates in those elements. As the number of duplicates D grows, the time it takes to construct a sorted set grows polynomially, approaching O(N^2), which is particularly bad given that the purpose of a set is to help remove duplicates. The core cause is a loop over the elements to determine whether element n is equal to n-1; if it is, element n is removed, but it's removed via a call to List<T>.RemoveAt, which will shift down all of the elements above it in the list. A better implementation can make this O(N) instead; the ctor overall is still then O(N log N), due to the sort, but that's better than O(N^2). - To sort the elements, the ctor builds a List<T> of the elements but then uses ToArray to get an array used to actually build the set; this incurs an unnecessary (and potentially large) array allocation and copy. We can avoid that entirely by just building up the array manually using the same logic that List<T> does. That same logic is actually duplicated in multiple places, including in Stack<T>, so I've extracted it as a helper to use in both places (there are a few other places in corefx that use the same logic, hence I put the helper into a Common file, but I did not condense those other uses as part of this commit). This then further helps with Stack<T> perf in a few corner cases, such as when a Stack<T> is constructed with an empty enumerable. (It's not clear to me why Queue<T> doesn't have the same IColection<T>-related logic; probably just an oversight, but I didn't want to change its behavior with the additional ICollection<T> check.)
1 parent 9aa6349 commit 44f26bf

File tree

4 files changed

+106
-38
lines changed

4 files changed

+106
-38
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
4+
namespace System.Collections.Generic
5+
{
6+
/// <summary>Internal helper functions for working with enumerables.</summary>
7+
internal static class EnumerableHelpers
8+
{
9+
/// <summary>Converts an enumerable to an array using the same logic as does List{T}.</summary>
10+
/// <param name="length">The number of items stored in the resulting array, 0-indexed.</param>
11+
/// <returns>
12+
/// The resulting array. The length of the array may be greater than <paramref name="length"/>,
13+
/// which is the actual number of elements in the array.
14+
/// </returns>
15+
internal static T[] ToArray<T>(IEnumerable<T> source, out int length)
16+
{
17+
T[] arr;
18+
int count = 0;
19+
20+
ICollection<T> ic = source as ICollection<T>;
21+
if (ic != null)
22+
{
23+
count = ic.Count;
24+
if (count == 0)
25+
{
26+
arr = Array.Empty<T>();
27+
}
28+
else
29+
{
30+
// Allocate an array of the desired size, then copy the elements into it. Note that this has the same
31+
// issue regarding concurrency as other existing collections like List<T>. If the collection size
32+
// concurrently changes between the array allocation and the CopyTo, we could end up either getting an
33+
// exception from overrunning the array (if the size went up) or we could end up not filling as many
34+
// items as 'count' suggests (if the size went down). This is only an issue for concurrent collections
35+
// that implement ICollection<T>, which as of .NET 4.6 is just ConcurrentDictionary<TKey, TValue>.
36+
arr = new T[count];
37+
ic.CopyTo(arr, 0);
38+
}
39+
}
40+
else
41+
{
42+
arr = Array.Empty<T>();
43+
foreach (var item in source)
44+
{
45+
if (count == arr.Length)
46+
{
47+
// MaxArrayLength is defined in Array.MaxArrayLength and in gchelpers in CoreCLR.
48+
// It represents the maximum number of elements that can be in an array where
49+
// the size of the element is greater than one byte; a separate, slightly larger constant,
50+
// is used when the size of the element is one.
51+
const int MaxArrayLength = 0x7FEFFFFF;
52+
53+
// This is the same growth logic as in List<T>:
54+
// If the array is currently empty, we make it a default size. Otherwise, we attempt to
55+
// double the size of the array. Doubling will overflow once the size of the array reaches
56+
// 2^30, since doubling to 2^31 is 1 larger than Int32.MaxValue. In that case, we instead
57+
// constrain the length to be MaxArrayLength (this overflow check works because of of the
58+
// cast to uint). Because a slightly larger constant is used when T is one byte in size, we
59+
// could then end up in a situation where arr.Length is MaxArrayLength or slightly larger, such
60+
// that we constrain newLength to be MaxArrayLength but the needed number of elements is actually
61+
// larger than that. For that case, we then ensure that the newLength is large enough to hold
62+
// the desired capacity. This does mean that in the very rare case where we've grown to such a
63+
// large size, each new element added after MaxArrayLength will end up doing a resize.
64+
const int DefaultCapacity = 4;
65+
int newLength = count == 0 ? DefaultCapacity : count * 2;
66+
if ((uint)newLength > MaxArrayLength)
67+
{
68+
newLength = MaxArrayLength;
69+
}
70+
if (newLength < count + 1)
71+
{
72+
newLength = count + 1;
73+
}
74+
75+
arr = ArrayT<T>.Resize(arr, newLength, count);
76+
}
77+
arr[count++] = item;
78+
}
79+
}
80+
81+
length = count;
82+
return arr;
83+
}
84+
}
85+
}

src/System.Collections/src/System.Collections.csproj

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,14 @@
4242
<Compile Include="System\Collections\StructuralComparisons.cs" />
4343
</ItemGroup>
4444
<ItemGroup>
45+
<Compile Include="$(CommonPath)\System\ArrayT.cs">
46+
<Link>Common\System\ArrayT.cs</Link>
47+
</Compile>
4548
<Compile Include="$(CommonPath)\System\Collections\HashHelpers.cs">
4649
<Link>Common\System\Collections\HashHelpers.cs</Link>
4750
</Compile>
48-
<Compile Include="$(CommonPath)\System\ArrayT.cs">
49-
<Link>Common\System\ArrayT.cs</Link>
51+
<Compile Include="$(CommonPath)\System\Collections\Generic\EnumerableHelpers.cs">
52+
<Link>Common\System\Collections\Generic\EnumerableHelpers.cs</Link>
5053
</Compile>
5154
</ItemGroup>
5255
<ItemGroup>

src/System.Collections/src/System/Collections/Generic/SortedSet.cs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,9 @@ public SortedSet(IEnumerable<T> collection, IComparer<T> comparer)
9595
//breadth first traversal to recreate nodes
9696
if (baseSortedSet.Count == 0)
9797
{
98-
_count = 0;
99-
_version = 0;
100-
_root = null;
10198
return;
10299
}
103100

104-
105101
//pre order way to replicate nodes
106102
Stack<Node> theirStack = new Stack<SortedSet<T>.Node>(2 * log2(baseSortedSet.Count) + 2);
107103
Stack<Node> myStack = new Stack<SortedSet<T>.Node>(2 * log2(baseSortedSet.Count) + 2);
@@ -138,23 +134,27 @@ public SortedSet(IEnumerable<T> collection, IComparer<T> comparer)
138134
}
139135
}
140136
_count = baseSortedSet._count;
141-
_version = 0;
142137
}
143138
else
144-
{ //As it stands, you're doing an NlogN sort of the collection
145-
List<T> els = new List<T>(collection);
146-
els.Sort(_comparer);
147-
for (int i = 1; i < els.Count; i++)
139+
{
140+
int count;
141+
T[] els = EnumerableHelpers.ToArray(collection, out count);
142+
if (count > 0)
148143
{
149-
if (comparer.Compare(els[i], els[i - 1]) == 0)
144+
Array.Sort(els, 0, count, _comparer);
145+
int index = 1;
146+
for (int i = 1; i < count; i++)
150147
{
151-
els.RemoveAt(i);
152-
i--;
148+
if (comparer.Compare(els[i], els[i - 1]) != 0)
149+
{
150+
els[index++] = els[i];
151+
}
153152
}
153+
count = index;
154+
155+
_root = ConstructRootFromSortedArray(els, 0, count - 1, null);
156+
_count = count;
154157
}
155-
_root = ConstructRootFromSortedArray(els.ToArray(), 0, els.Count - 1, null);
156-
_count = els.Count;
157-
_version = 0;
158158
}
159159
}
160160

src/System.Collections/src/System/Collections/Generic/Stack.cs

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -60,27 +60,7 @@ public Stack(IEnumerable<T> collection)
6060
if (collection == null)
6161
throw new ArgumentNullException("collection");
6262

63-
ICollection<T> c = collection as ICollection<T>;
64-
if (c != null)
65-
{
66-
int count = c.Count;
67-
_array = new T[count];
68-
c.CopyTo(_array, 0);
69-
_size = count;
70-
}
71-
else
72-
{
73-
_size = 0;
74-
_array = new T[DefaultCapacity];
75-
76-
using (IEnumerator<T> en = collection.GetEnumerator())
77-
{
78-
while (en.MoveNext())
79-
{
80-
Push(en.Current);
81-
}
82-
}
83-
}
63+
_array = EnumerableHelpers.ToArray(collection, out _size);
8464
}
8565

8666
/// <include file='doc\Stack.uex' path='docs/doc[@for="Stack.Count"]/*' />

0 commit comments

Comments
 (0)