Skip to content

Commit 1187e69

Browse files
committed
Merged PR 722049: Revert file content table to use ConcurrentBigMap
- Revert the use of `ConcurrentDictionary` back to `ConcurrentBigMap` because based on recent benchmarking, `ConcurrentBigMap`, with the typical load of file content table, has better memory characteristics. - Avoid unnecessary existing check when "cloning" concurrent big map. This gives 1.47x speed up. | Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | |------------|-------------|-----------|------------|-------------|-------------|------------| | CopyOld | 843.5 ms | 9.61 ms | 8.98 ms | 8000.0000 | 5000.0000 | 48.05 MB | | CopyNew | 573.4 ms | 11.40 ms | 11.19 ms | 8000.0000 | 5000.0000 | 48.05 MB | Related work items: #2072119
1 parent f658365 commit 1187e69

File tree

5 files changed

+79
-68
lines changed

5 files changed

+79
-68
lines changed

Public/Src/Utilities/Native/IO/FileIdAndVolumeId.cs

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,6 @@ namespace BuildXL.Native.IO
2424
{
2525
internal static readonly int Size = Marshal.SizeOf<FileIdAndVolumeId>();
2626

27-
/// <summary>
28-
/// Comparer instance.
29-
/// </summary>
30-
public static readonly Comparer ComparerInstance = new();
31-
3227
/// <summary>
3328
/// Volume containing the file.
3429
/// </summary>
@@ -92,17 +87,5 @@ public static FileIdAndVolumeId Deserialize(BinaryReader reader)
9287
{
9388
return new FileIdAndVolumeId(reader.ReadUInt64(), FileId.Deserialize(reader));
9489
}
95-
96-
/// <summary>
97-
/// Default comparer.
98-
/// </summary>
99-
public class Comparer : IEqualityComparer<FileIdAndVolumeId>
100-
{
101-
/// <inheritdoc/>
102-
public bool Equals(FileIdAndVolumeId x, FileIdAndVolumeId y) => x.Equals(y);
103-
104-
/// <inheritdoc/>
105-
public int GetHashCode([DisallowNull] FileIdAndVolumeId obj) => obj.GetHashCode();
106-
}
10790
}
10891
}

Public/Src/Utilities/Storage/FileContentTable.cs

Lines changed: 12 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ namespace BuildXL.Storage
6161
/// </remarks>
6262
public sealed class FileContentTable : IFileChangeTrackingObserver
6363
{
64-
private static readonly FileEnvelope s_fileEnvelope = new(name: "FileContentTable." + ContentHashingUtilities.HashInfo.Name, version: 20);
64+
private static readonly FileEnvelope s_fileEnvelope = new(name: "FileContentTable." + ContentHashingUtilities.HashInfo.Name, version: 21);
6565

6666
/// <summary>
6767
/// Default time-to-live (TTL) for new entries to a <see cref="FileContentTable"/>.
@@ -72,7 +72,7 @@ public sealed class FileContentTable : IFileChangeTrackingObserver
7272
/// <summary>
7373
/// These are the (global file ID) -> (USN, hash) mappings recorded or retrieved in this session.
7474
/// </summary>
75-
private readonly ConcurrentDictionary<FileIdAndVolumeId, Entry> m_entries = new(1024, 1024);
75+
private readonly ConcurrentBigMap<FileIdAndVolumeId, Entry> m_entries = new();
7676

7777
/// <summary>
7878
/// In the event a volume has a disabled change journal or not all files have USNs (journal enabled after last write to a
@@ -148,38 +148,12 @@ private FileContentTable(FileContentTable other, LoggingContext loggingContext,
148148
EntryTimeToLive = entryTimeToLive;
149149

150150
// Copy entries, discarding the stale ones and decrementing TTLs
151-
//
152-
// Previously, the m_entries' implementation uses ConcurrentBigMap, and it calls ConcurrentBigMap's ConvertUnsafe to cheaply
153-
// copy the entries:
154-
//
155-
// m_entries = other.m_entries.ConvertUnsafe(e => e.TimeToLive > 0
156-
// ? e.WithTimeToLive((ushort)Math.Min(entryTimeToLive, e.TimeToLive - 1))
157-
// : new Entry(e.Usn, default /* Invalid hash as a marker */, e.Length, e.TimeToLive));
158-
// m_entries
159-
// .Where(kvp => !kvp.Value.Hash.IsValid)
160-
// .Select(kvp => kvp.Key)
161-
// .ToList()
162-
// .ForEach(k => m_entries.RemoveKey(k));
163-
//
164-
// However, the enumeration implementation of ConcurrentBigMap seems to be buggy and causes
165-
// the following exception:
166-
// ---> System.IndexOutOfRangeException: Index was outside the bounds of the array.
167-
// at IEnumerator<TItem> BuildXL.Utilities.Collections.ConcurrentBigSet<TItem>+ItemsCollection.GetEnumerator() + MoveNext()
168-
// at List<TResult> System.Linq.Enumerable+WhereSelectEnumerableIterator<TSource, TResult>.ToList()
169-
// at List<TSource> System.Linq.Enumerable.ToList<TSource>(IEnumerable<TSource> source)
170-
// at new BuildXL.Storage.FileContentTable(FileContentTable other, LoggingContext loggingContext, ushort entryTimeToLive) in \.\Public\Src\Utilities\Storage\FileContentTable.cs:line 156
171-
m_entries = new ConcurrentDictionary<FileIdAndVolumeId, Entry>(
172-
1024,
173-
other.m_entries
174-
.Where(kvp => kvp.Value.TimeToLive > 0)
175-
.Select(kvp => new KeyValuePair<FileIdAndVolumeId, Entry>(
176-
kvp.Key,
177-
kvp.Value.WithTimeToLive((ushort)Math.Min(entryTimeToLive, kvp.Value.TimeToLive - 1)))),
178-
#if NET_FRAMEWORK
179-
FileIdAndVolumeId.ComparerInstance); // ConcurrentDictionary does not support null comparer on .NET Framework
180-
#else
181-
null);
182-
#endif
151+
m_entries = ConcurrentBigMap<FileIdAndVolumeId, Entry>.Create(items: other.m_entries
152+
.Where(kvp => kvp.Value.TimeToLive > 0)
153+
.Select(kvp => new KeyValuePair<FileIdAndVolumeId, Entry>(
154+
kvp.Key,
155+
kvp.Value.WithTimeToLive((ushort)Math.Min(entryTimeToLive, kvp.Value.TimeToLive - 1)))),
156+
checkExistingItem: false);
183157
}
184158

185159
/// <summary>
@@ -475,11 +449,12 @@ public VersionedFileIdentity RecordContentHash(
475449
// So, concurrently creating and recording (or even just recording) different links is possible, and
476450
// keeping the last stored entry (rather than highest-USN entry) can introduce false positives.
477451
var fileIdAndVolumeId = new FileIdAndVolumeId(strongIdentity.Value.VolumeSerialNumber, strongIdentity.Value.FileId);
478-
452+
479453
m_entries.AddOrUpdate(
480454
fileIdAndVolumeId,
455+
(@this: this, path, newEntry),
481456
addValueFactory: static (key, state) => state.newEntry,
482-
updateValueFactory: static (key, existingEntry, state) =>
457+
updateValueFactory: static (key, state, existingEntry) =>
483458
{
484459
FileContentTable @this = state.@this;
485460
Entry newEntry = state.newEntry;
@@ -510,8 +485,7 @@ public VersionedFileIdentity RecordContentHash(
510485
}
511486

512487
return newEntry;
513-
},
514-
factoryArgument: (@this: this, path, newEntry));
488+
});
515489

516490
if (ETWLogger.Log.IsEnabled(BuildXL.Tracing.Diagnostics.EventLevel.Verbose, Keywords.Diagnostics))
517491
{

Public/Src/Utilities/UnitTests/Collections/ConcurrentBigMapTests.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Licensed under the MIT License.
33

44
using System;
5+
using System.Collections.Generic;
56
using System.IO;
67
using System.Linq;
78
using System.Threading;
@@ -166,6 +167,45 @@ private static void TestOperationsHelper(bool parallel)
166167
}, parallel);
167168
}
168169

170+
[Fact]
171+
public void TestCopy()
172+
{
173+
const int Length = 10_000;
174+
var map = ConcurrentBigMap<int, int>.Create(items: Enumerable.Range(0, Length).Select(i => new KeyValuePair<int, int>(i, i)), checkExistingItem: true);
175+
var copy = ConcurrentBigMap<int, int>.Create(items: map, checkExistingItem: false);
176+
Enumerable.Range(0, Length).ToList().ForEach(i =>
177+
{
178+
XAssert.AreEqual(map.Count, copy.Count);
179+
XAssert.AreEqual(map.TryGetValue(i, out var mapValue), copy.TryGetValue(i, out var copyValue));
180+
XAssert.AreEqual(mapValue, copyValue);
181+
});
182+
}
183+
184+
[Fact]
185+
public void TestRemoveWhileEnumerateSingleThread()
186+
{
187+
const int Length = 10_000;
188+
var map = ConcurrentBigMap<int, int>.Create(items: Enumerable.Range(0, Length).Select(i => new KeyValuePair<int, int>(i, i)), checkExistingItem: true);
189+
var copy = ConcurrentBigMap<int, int>.Create(items: map, checkExistingItem: false);
190+
191+
map.TryRemove(77, out _);
192+
copy.TryRemove(77, out _);
193+
copy.TryAdd(77, 12345);
194+
195+
foreach (var kvp in copy.Where(kvp => kvp.Key % 2 == 1))
196+
{
197+
copy.TryRemove(kvp.Key, out _);
198+
}
199+
200+
Enumerable.Range(0, Length).ToList().ForEach(i =>
201+
{
202+
if (i % 2 == 1)
203+
{
204+
XAssert.IsFalse(copy.ContainsKey(i));
205+
}
206+
});
207+
}
208+
169209
private static void For(int count, Action<int> action, bool parallel)
170210
{
171211
if (parallel)

Public/Src/Utilities/Utilities.Core/Collections/ConcurrentBigMap.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,20 @@ public ConcurrentBigMap(
7676
/// <param name="items">an enumeration of unique (!) items to insert</param>
7777
/// <param name="keyComparer">the comparer for keys</param>
7878
/// <param name="valueComparer">the comparer for values used in compare exchange operations</param>
79+
/// <param name="checkExistingItem">if true, checks if the item already exists in the backing set</param>
7980
public static ConcurrentBigMap<TKey, TValue> Create(
8081
int concurrencyLevel = ConcurrentBigSet<KeyValuePair<TKey, TValue>>.DefaultConcurrencyLevel,
8182
int capacity = ConcurrentBigSet<KeyValuePair<TKey, TValue>>.DefaultCapacity,
8283
int ratio = ConcurrentBigSet<KeyValuePair<TKey, TValue>>.DefaultBucketToItemsRatio,
8384
IEnumerable<KeyValuePair<TKey, TValue>> items = null,
84-
IEqualityComparer<TKey> keyComparer = null, IEqualityComparer<TValue> valueComparer = null)
85+
IEqualityComparer<TKey> keyComparer = null,
86+
IEqualityComparer<TValue> valueComparer = null,
87+
bool checkExistingItem = true)
8588
{
8689
var result = new ConcurrentBigMap<TKey, TValue>(concurrencyLevel: concurrencyLevel, capacity: capacity, ratio: ratio, keyComparer: keyComparer, valueComparer: valueComparer);
8790
if (items != null)
8891
{
89-
result.BackingSet.UnsafeAddItems(items.Select(item => result.CreateKeyValuePendingItem(item.Key, item.Value)));
92+
result.BackingSet.UnsafeAddItems(items.Select(item => result.CreateKeyValuePendingItem(item.Key, item.Value)), checkExistingItem: checkExistingItem);
9093
}
9194

9295
return result;

Public/Src/Utilities/Utilities.Core/Collections/ConcurrentBigSet.cs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,7 @@ public GetAddOrUpdateResult Remove(TItem item, IEqualityComparer<TItem> comparer
538538
/// <summary>
539539
/// This method assumes no concurrent accesses, and that the new item isn't already in the list
540540
/// </summary>
541-
internal void UnsafeAddItems<TPendingItem>(IEnumerable<TPendingItem> pendingItems)
541+
internal void UnsafeAddItems<TPendingItem>(IEnumerable<TPendingItem> pendingItems, bool checkExistingItem)
542542
where TPendingItem : IPendingSetItem<TItem>
543543
{
544544
Accessors accessors = m_accessors;
@@ -548,19 +548,30 @@ internal void UnsafeAddItems<TPendingItem>(IEnumerable<TPendingItem> pendingItem
548548

549549
int lockNo = 0;
550550

551+
int bucketNo;
552+
int headNodeIndex;
553+
551554
// Number of nodes searched to find the item or the total number of nodes if the item is not found
552-
var result = FindItem(
553-
pendingItem,
554-
bucketHashCode,
555-
out int bucketNo,
556-
out int headNodeIndex,
557-
ref accessors,
558-
out int findCount,
559-
out int priorNodeIndex);
560-
Contract.Assert(!result.IsFound);
555+
if (checkExistingItem)
556+
{
557+
var result = FindItem(
558+
pendingItem,
559+
bucketHashCode,
560+
out bucketNo,
561+
out headNodeIndex,
562+
ref accessors,
563+
out int findCount,
564+
out int priorNodeIndex);
565+
Contract.Assert(!result.IsFound);
566+
}
567+
else
568+
{
569+
bucketNo = m_buckets.GetBucketNo(bucketHashCode);
570+
headNodeIndex = m_buckets[bucketNo];
571+
}
561572

562573
// now make an item from the lookup value
563-
TItem item = pendingItem.CreateOrUpdateItem(result.Item, false, out bool remove);
574+
TItem item = pendingItem.CreateOrUpdateItem(default, false, out bool remove);
564575
Contract.Assert(!remove, "Remove is only allowed when performing update operation");
565576
SetBucketHeadNode(bucketNo, lockNo, bucketHashCode, headNodeIndex, item, ref accessors);
566577
int countAfterAdd = Interlocked.Increment(ref m_count);

0 commit comments

Comments
 (0)