Skip to content

Commit 4efe8f5

Browse files
authored
Optimize document and media seeding by looking up from database in batches (#19890)
* Optimize document and media seeding by looking up from database in batches. * Ensure null values aren't stored in the cache when checking existance. * Fixed failing integration tests. * Resolved issue with not writing to the L1 cache on an L2 hit. * Tidied up and populated XML header comments. * Address issue raised in code review.
1 parent 417576b commit 4efe8f5

File tree

8 files changed

+503
-114
lines changed

8 files changed

+503
-114
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
using Microsoft.Extensions.Caching.Hybrid;
2+
3+
namespace Umbraco.Cms.Infrastructure.HybridCache.Extensions;
4+
5+
/// <summary>
6+
/// Provides extension methods on <see cref="Microsoft.Extensions.Caching.Hybrid.HybridCache"/>.
7+
/// </summary>
8+
internal static class HybridCacheExtensions
9+
{
10+
/// <summary>
11+
/// Returns true if the cache contains an item with a matching key.
12+
/// </summary>
13+
/// <param name="cache">An instance of <see cref="Microsoft.Extensions.Caching.Hybrid.HybridCache"/></param>
14+
/// <param name="key">The name (key) of the item to search for in the cache.</param>
15+
/// <returns>True if the item exists already. False if it doesn't.</returns>
16+
/// <remarks>
17+
/// Hat-tip: https://github.com/dotnet/aspnetcore/discussions/57191
18+
/// Will never add or alter the state of any items in the cache.
19+
/// </remarks>
20+
public static async Task<bool> ExistsAsync(this Microsoft.Extensions.Caching.Hybrid.HybridCache cache, string key)
21+
{
22+
(bool exists, _) = await TryGetValueAsync<object>(cache, key);
23+
return exists;
24+
}
25+
26+
/// <summary>
27+
/// Returns true if the cache contains an item with a matching key, along with the value of the matching cache entry.
28+
/// </summary>
29+
/// <typeparam name="T">The type of the value of the item in the cache.</typeparam>
30+
/// <param name="cache">An instance of <see cref="Microsoft.Extensions.Caching.Hybrid.HybridCache"/></param>
31+
/// <param name="key">The name (key) of the item to search for in the cache.</param>
32+
/// <returns>A tuple of <see cref="bool"/> and the object (if found) retrieved from the cache.</returns>
33+
/// <remarks>
34+
/// Hat-tip: https://github.com/dotnet/aspnetcore/discussions/57191
35+
/// Will never add or alter the state of any items in the cache.
36+
/// </remarks>
37+
public static async Task<(bool Exists, T? Value)> TryGetValueAsync<T>(this Microsoft.Extensions.Caching.Hybrid.HybridCache cache, string key)
38+
{
39+
var exists = true;
40+
41+
T? result = await cache.GetOrCreateAsync<object, T>(
42+
key,
43+
null!,
44+
(_, _) =>
45+
{
46+
exists = false;
47+
return new ValueTask<T>(default(T)!);
48+
},
49+
new HybridCacheEntryOptions(),
50+
null,
51+
CancellationToken.None);
52+
53+
// In checking for the existence of the item, if not found, we will have created a cache entry with a null value.
54+
// So remove it again.
55+
if (exists is false)
56+
{
57+
await cache.RemoveAsync(key);
58+
}
59+
60+
return (exists, result);
61+
}
62+
}

src/Umbraco.PublishedCache.HybridCache/Persistence/DatabaseCacheRepository.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,26 @@ public void Rebuild(
146146
return CreateContentNodeKit(dto, serializer, preview);
147147
}
148148

149+
public async Task<IEnumerable<ContentCacheNode>> GetContentSourcesAsync(IEnumerable<Guid> keys, bool preview = false)
150+
{
151+
Sql<ISqlContext>? sql = SqlContentSourcesSelect()
152+
.Append(SqlObjectTypeNotTrashed(SqlContext, Constants.ObjectTypes.Document))
153+
.WhereIn<NodeDto>(x => x.UniqueId, keys)
154+
.Append(SqlOrderByLevelIdSortOrder(SqlContext));
155+
156+
List<ContentSourceDto> dtos = await Database.FetchAsync<ContentSourceDto>(sql);
157+
158+
dtos = dtos
159+
.Where(x => x is not null)
160+
.Where(x => preview || x.PubDataRaw is not null || x.PubData is not null)
161+
.ToList();
162+
163+
IContentCacheDataSerializer serializer =
164+
_contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Document);
165+
return dtos
166+
.Select(x => CreateContentNodeKit(x, serializer, preview));
167+
}
168+
149169
private IEnumerable<ContentSourceDto> GetContentSourceByDocumentTypeKey(IEnumerable<Guid> documentTypeKeys, Guid objectType)
150170
{
151171
Guid[] keys = documentTypeKeys.ToArray();
@@ -220,6 +240,25 @@ public IEnumerable<Guid> GetDocumentKeysByContentTypeKeys(IEnumerable<Guid> keys
220240
return CreateMediaNodeKit(dto, serializer);
221241
}
222242

243+
public async Task<IEnumerable<ContentCacheNode>> GetMediaSourcesAsync(IEnumerable<Guid> keys)
244+
{
245+
Sql<ISqlContext>? sql = SqlMediaSourcesSelect()
246+
.Append(SqlObjectTypeNotTrashed(SqlContext, Constants.ObjectTypes.Media))
247+
.WhereIn<NodeDto>(x => x.UniqueId, keys)
248+
.Append(SqlOrderByLevelIdSortOrder(SqlContext));
249+
250+
List<ContentSourceDto> dtos = await Database.FetchAsync<ContentSourceDto>(sql);
251+
252+
dtos = dtos
253+
.Where(x => x is not null)
254+
.ToList();
255+
256+
IContentCacheDataSerializer serializer =
257+
_contentCacheDataSerializerFactory.Create(ContentCacheDataSerializerEntityType.Media);
258+
return dtos
259+
.Select(x => CreateMediaNodeKit(x, serializer));
260+
}
261+
223262
private async Task OnRepositoryRefreshed(IContentCacheDataSerializer serializer, ContentCacheNode content, bool preview)
224263
{
225264
// use a custom SQL to update row version on each update

src/Umbraco.PublishedCache.HybridCache/Persistence/IDatabaseCacheRepository.cs

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,48 +5,96 @@ namespace Umbraco.Cms.Infrastructure.HybridCache.Persistence;
55

66
internal interface IDatabaseCacheRepository
77
{
8+
/// <summary>
9+
/// Deletes the specified content item from the cache database.
10+
/// </summary>
811
Task DeleteContentItemAsync(int id);
912

13+
/// <summary>
14+
/// Gets a single cache node for a document key.
15+
/// </summary>
1016
Task<ContentCacheNode?> GetContentSourceAsync(Guid key, bool preview = false);
1117

18+
/// <summary>
19+
/// Gets a collection of cache nodes for a collection of document keys.
20+
/// </summary>
21+
// TODO (V18): Remove the default implementation on this method.
22+
async Task<IEnumerable<ContentCacheNode>> GetContentSourcesAsync(IEnumerable<Guid> keys, bool preview = false)
23+
{
24+
var contentCacheNodes = new List<ContentCacheNode>();
25+
foreach (Guid key in keys)
26+
{
27+
ContentCacheNode? contentSource = await GetContentSourceAsync(key, preview);
28+
if (contentSource is not null)
29+
{
30+
contentCacheNodes.Add(contentSource);
31+
}
32+
}
33+
34+
return contentCacheNodes;
35+
}
36+
37+
/// <summary>
38+
/// Gets a single cache node for a media key.
39+
/// </summary>
1240
Task<ContentCacheNode?> GetMediaSourceAsync(Guid key);
1341

42+
/// <summary>
43+
/// Gets a collection of cache nodes for a collection of media keys.
44+
/// </summary>
45+
// TODO (V18): Remove the default implementation on this method.
46+
async Task<IEnumerable<ContentCacheNode>> GetMediaSourcesAsync(IEnumerable<Guid> keys)
47+
{
48+
var contentCacheNodes = new List<ContentCacheNode>();
49+
foreach (Guid key in keys)
50+
{
51+
ContentCacheNode? contentSource = await GetMediaSourceAsync(key);
52+
if (contentSource is not null)
53+
{
54+
contentCacheNodes.Add(contentSource);
55+
}
56+
}
1457

58+
return contentCacheNodes;
59+
}
60+
61+
/// <summary>
62+
/// Gets a collection of cache nodes for a collection of content type keys and entity type.
63+
/// </summary>
1564
IEnumerable<ContentCacheNode> GetContentByContentTypeKey(IEnumerable<Guid> keys, ContentCacheDataSerializerEntityType entityType);
1665

1766
/// <summary>
18-
/// Gets all content keys of specific document types
67+
/// Gets all content keys of specific document types.
1968
/// </summary>
2069
/// <param name="keys">The document types to find content using.</param>
70+
/// <param name="published">A flag indicating whether to restrict to just published content.</param>
2171
/// <returns>The keys of all content use specific document types.</returns>
2272
IEnumerable<Guid> GetDocumentKeysByContentTypeKeys(IEnumerable<Guid> keys, bool published = false);
2373

2474
/// <summary>
25-
/// Refreshes the nucache database row for the given cache node />
75+
/// Refreshes the cache for the given document cache node.
2676
/// </summary>
27-
/// <returns><placeholder>A <see cref="Task"/> representing the asynchronous operation.</placeholder></returns>
2877
Task RefreshContentAsync(ContentCacheNode contentCacheNode, PublishedState publishedState);
2978

3079
/// <summary>
31-
/// Refreshes the nucache database row for the given cache node />
80+
/// Refreshes the cache row for the given media cache node.
3281
/// </summary>
33-
/// <returns><placeholder>A <see cref="Task"/> representing the asynchronous operation.</placeholder></returns>
3482
Task RefreshMediaAsync(ContentCacheNode contentCacheNode);
3583

3684
/// <summary>
37-
/// Rebuilds the caches for content, media and/or members based on the content type ids specified
85+
/// Rebuilds the caches for content, media and/or members based on the content type ids specified.
3886
/// </summary>
3987
/// <param name="contentTypeIds">
4088
/// If not null will process content for the matching content types, if empty will process all
41-
/// content
89+
/// content.
4290
/// </param>
4391
/// <param name="mediaTypeIds">
4492
/// If not null will process content for the matching media types, if empty will process all
45-
/// media
93+
/// media.
4694
/// </param>
4795
/// <param name="memberTypeIds">
4896
/// If not null will process content for the matching members types, if empty will process all
49-
/// members
97+
/// members.
5098
/// </param>
5199
void Rebuild(
52100
IReadOnlyCollection<int>? contentTypeIds = null,

src/Umbraco.PublishedCache.HybridCache/Services/DocumentCacheService.cs

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
#if DEBUG
2+
using System.Diagnostics;
3+
#endif
14
using Microsoft.Extensions.Caching.Hybrid;
5+
using Microsoft.Extensions.Logging;
26
using Microsoft.Extensions.Options;
37
using Umbraco.Cms.Core;
48
using Umbraco.Cms.Core.Models;
@@ -7,6 +11,7 @@
711
using Umbraco.Cms.Core.Scoping;
812
using Umbraco.Cms.Core.Services;
913
using Umbraco.Cms.Core.Services.Navigation;
14+
using Umbraco.Cms.Infrastructure.HybridCache.Extensions;
1015
using Umbraco.Cms.Infrastructure.HybridCache.Factories;
1116
using Umbraco.Cms.Infrastructure.HybridCache.Persistence;
1217
using Umbraco.Cms.Infrastructure.HybridCache.Serialization;
@@ -26,8 +31,8 @@ internal sealed class DocumentCacheService : IDocumentCacheService
2631
private readonly IPublishedModelFactory _publishedModelFactory;
2732
private readonly IPreviewService _previewService;
2833
private readonly IPublishStatusQueryService _publishStatusQueryService;
29-
private readonly IDocumentNavigationQueryService _documentNavigationQueryService;
3034
private readonly CacheSettings _cacheSettings;
35+
private readonly ILogger<DocumentCacheService> _logger;
3136
private HashSet<Guid>? _seedKeys;
3237

3338
private HashSet<Guid> SeedKeys
@@ -62,7 +67,7 @@ public DocumentCacheService(
6267
IPublishedModelFactory publishedModelFactory,
6368
IPreviewService previewService,
6469
IPublishStatusQueryService publishStatusQueryService,
65-
IDocumentNavigationQueryService documentNavigationQueryService)
70+
ILogger<DocumentCacheService> logger)
6671
{
6772
_databaseCacheRepository = databaseCacheRepository;
6873
_idKeyMap = idKeyMap;
@@ -74,8 +79,8 @@ public DocumentCacheService(
7479
_publishedModelFactory = publishedModelFactory;
7580
_previewService = previewService;
7681
_publishStatusQueryService = publishStatusQueryService;
77-
_documentNavigationQueryService = documentNavigationQueryService;
7882
_cacheSettings = cacheSettings.Value;
83+
_logger = logger;
7984
}
8085

8186
public async Task<IPublishedContent?> GetByKeyAsync(Guid key, bool? preview = null)
@@ -185,44 +190,64 @@ public async Task RemoveFromMemoryCacheAsync(Guid key)
185190

186191
public async Task SeedAsync(CancellationToken cancellationToken)
187192
{
188-
foreach (Guid key in SeedKeys)
193+
#if DEBUG
194+
var sw = new Stopwatch();
195+
sw.Start();
196+
#endif
197+
198+
const int GroupSize = 100;
199+
foreach (IEnumerable<Guid> group in SeedKeys.InGroupsOf(GroupSize))
189200
{
190-
if (cancellationToken.IsCancellationRequested)
201+
var uncachedKeys = new HashSet<Guid>();
202+
foreach (Guid key in group)
191203
{
192-
break;
193-
}
204+
if (cancellationToken.IsCancellationRequested)
205+
{
206+
break;
207+
}
194208

195-
var cacheKey = GetCacheKey(key, false);
209+
var cacheKey = GetCacheKey(key, false);
196210

197-
// We'll use GetOrCreateAsync because it may be in the second level cache, in which case we don't have to re-seed.
198-
ContentCacheNode? cachedValue = await _hybridCache.GetOrCreateAsync(
199-
cacheKey,
200-
async cancel =>
211+
var existsInCache = await _hybridCache.ExistsAsync(cacheKey);
212+
if (existsInCache is false)
201213
{
202-
using ICoreScope scope = _scopeProvider.CreateCoreScope();
214+
uncachedKeys.Add(key);
215+
}
216+
}
217+
218+
_logger.LogDebug("Uncached key count {KeyCount}", uncachedKeys.Count);
219+
220+
if (uncachedKeys.Count == 0)
221+
{
222+
continue;
223+
}
203224

204-
ContentCacheNode? cacheNode = await _databaseCacheRepository.GetContentSourceAsync(key);
225+
using ICoreScope scope = _scopeProvider.CreateCoreScope();
205226

206-
scope.Complete();
227+
IEnumerable<ContentCacheNode> cacheNodes = await _databaseCacheRepository.GetContentSourcesAsync(uncachedKeys);
207228

208-
// We don't want to seed drafts
209-
if (cacheNode is null || cacheNode.IsDraft)
210-
{
211-
return null;
212-
}
229+
scope.Complete();
213230

214-
return cacheNode;
215-
},
216-
GetSeedEntryOptions(),
217-
GenerateTags(key),
218-
cancellationToken: cancellationToken);
231+
_logger.LogDebug("Document nodes to cache {NodeCount}", cacheNodes.Count());
219232

220-
// If the value is null, it's likely because
221-
if (cachedValue is null)
233+
foreach (ContentCacheNode cacheNode in cacheNodes)
222234
{
223-
await _hybridCache.RemoveAsync(cacheKey, cancellationToken);
235+
var cacheKey = GetCacheKey(cacheNode.Key, false);
236+
await _hybridCache.SetAsync(
237+
cacheKey,
238+
cacheNode,
239+
GetSeedEntryOptions(),
240+
GenerateTags(cacheNode.Key),
241+
cancellationToken: cancellationToken);
224242
}
225243
}
244+
245+
#if DEBUG
246+
sw.Stop();
247+
_logger.LogInformation("Document cache seeding completed in {ElapsedMilliseconds} ms with {SeedCount} seed keys.", sw.ElapsedMilliseconds, SeedKeys.Count);
248+
#else
249+
_logger.LogInformation("Document cache seeding completed with {SeedCount} seed keys.", SeedKeys.Count);
250+
#endif
226251
}
227252

228253
// Internal for test purposes.
@@ -256,16 +281,7 @@ public async Task<bool> HasContentByIdAsync(int id, bool preview = false)
256281
return false;
257282
}
258283

259-
ContentCacheNode? contentCacheNode = await _hybridCache.GetOrCreateAsync<ContentCacheNode?>(
260-
GetCacheKey(keyAttempt.Result, preview), // Unique key to the cache entry
261-
cancel => ValueTask.FromResult<ContentCacheNode?>(null));
262-
263-
if (contentCacheNode is null)
264-
{
265-
await _hybridCache.RemoveAsync(GetCacheKey(keyAttempt.Result, preview));
266-
}
267-
268-
return contentCacheNode is not null;
284+
return await _hybridCache.ExistsAsync(GetCacheKey(keyAttempt.Result, preview));
269285
}
270286

271287
public async Task RefreshContentAsync(IContent content)

0 commit comments

Comments
 (0)