|
2 | 2 | // Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
|
3 | 3 | // See the LICENSE file in the project root for more information
|
4 | 4 |
|
5 |
| -using System.Diagnostics; |
6 |
| -using System.Text; |
| 5 | +using System.Collections.Concurrent; |
7 | 6 | using Amazon.Lambda.Core;
|
8 | 7 | using Amazon.Lambda.RuntimeSupport;
|
| 8 | +using Amazon.Lambda.Serialization.SystemTextJson; |
| 9 | +using Amazon.Lambda.SQSEvents; |
9 | 10 | using Amazon.S3;
|
10 |
| -using Amazon.S3.Model; |
| 11 | +using Amazon.S3.Util; |
| 12 | +using Elastic.Documentation.Lambda.LinkIndexUploader; |
11 | 13 | using Elastic.Markdown.IO.State;
|
12 | 14 | using Elastic.Markdown.Links.CrossLinks;
|
13 | 15 |
|
14 | 16 | const string bucketName = "elastic-docs-link-index";
|
| 17 | +const string indexFile = "link-index.json"; |
15 | 18 |
|
16 |
| -await LambdaBootstrapBuilder.Create(Handler) |
17 |
| - .Build() |
| 19 | +await LambdaBootstrapBuilder.Create<SQSEvent, SQSBatchResponse>(Handler, new SourceGeneratorLambdaJsonSerializer<SerializerContext>()) |
| 20 | + .Build() |
18 | 21 | .RunAsync();
|
19 | 22 |
|
20 |
| -// Uncomment to test locally without uploading |
21 |
| -// await CreateLinkIndex(new AmazonS3Client()); |
| 23 | +return; |
22 | 24 |
|
23 |
| -#pragma warning disable CS8321 // Local function is declared but never used |
24 |
| -static async Task<string> Handler(ILambdaContext context) |
25 |
| -#pragma warning restore CS8321 // Local function is declared but never used |
| 25 | +// The SQS queue is configured to trigger when elastic/*/*/links.json files are created or updated. |
| 26 | +static async Task<SQSBatchResponse> Handler(SQSEvent ev, ILambdaContext context) |
26 | 27 | {
|
27 |
| - var sw = Stopwatch.StartNew(); |
28 |
| - |
29 |
| - IAmazonS3 s3Client = new AmazonS3Client(); |
30 |
| - var linkIndex = await CreateLinkIndex(s3Client); |
31 |
| - if (linkIndex == null) |
32 |
| - return $"Error encountered on server. getting list of objects."; |
33 |
| - |
34 |
| - var json = LinkIndex.Serialize(linkIndex); |
35 |
| - |
36 |
| - using var stream = new MemoryStream(Encoding.UTF8.GetBytes(json)); |
37 |
| - await s3Client.UploadObjectFromStreamAsync(bucketName, "link-index.json", stream, new Dictionary<string, object>(), CancellationToken.None); |
38 |
| - return $"Finished in {sw}"; |
39 |
| -} |
40 |
| - |
41 |
| - |
42 |
| -static async Task<LinkIndex?> CreateLinkIndex(IAmazonS3 s3Client) |
43 |
| -{ |
44 |
| - var request = new ListObjectsV2Request |
45 |
| - { |
46 |
| - BucketName = bucketName, |
47 |
| - MaxKeys = 1000 //default |
48 |
| - }; |
49 |
| - |
50 |
| - var linkIndex = new LinkIndex |
51 |
| - { |
52 |
| - Repositories = [] |
53 |
| - }; |
54 |
| - try |
| 28 | + var s3Client = new AmazonS3Client(); |
| 29 | + var linkIndexProvider = new LinkIndexProvider(s3Client, context.Logger, bucketName, indexFile); |
| 30 | + var batchItemFailures = new List<SQSBatchResponse.BatchItemFailure>(); |
| 31 | + foreach (var message in ev.Records) |
55 | 32 | {
|
56 |
| - ListObjectsV2Response response; |
57 |
| - do |
| 33 | + try |
58 | 34 | {
|
59 |
| - response = await s3Client.ListObjectsV2Async(request, CancellationToken.None); |
60 |
| - await Parallel.ForEachAsync(response.S3Objects, async (obj, ctx) => |
| 35 | + var s3RecordLinkReferenceTuples = await GetS3RecordLinkReferenceTuples(s3Client, message, context); |
| 36 | + foreach (var (s3Record, linkReference) in s3RecordLinkReferenceTuples) |
61 | 37 | {
|
62 |
| - if (!obj.Key.StartsWith("elastic/", StringComparison.OrdinalIgnoreCase)) |
63 |
| - return; |
64 |
| - |
65 |
| - var tokens = obj.Key.Split('/'); |
66 |
| - if (tokens.Length < 3) |
67 |
| - return; |
68 |
| - |
69 |
| - // TODO create a dedicated state file for git configuration |
70 |
| - // Deserializing all of the links metadata adds significant overhead |
71 |
| - var gitReference = await ReadLinkReferenceSha(s3Client, obj); |
72 |
| - |
73 |
| - var repository = tokens[1]; |
74 |
| - var branch = tokens[2]; |
75 |
| - |
76 |
| - var entry = new LinkIndexEntry |
77 |
| - { |
78 |
| - Repository = repository, |
79 |
| - Branch = branch, |
80 |
| - ETag = obj.ETag.Trim('"'), |
81 |
| - Path = obj.Key, |
82 |
| - GitReference = gitReference |
83 |
| - }; |
84 |
| - if (linkIndex.Repositories.TryGetValue(repository, out var existingEntry)) |
85 |
| - existingEntry[branch] = entry; |
86 |
| - else |
87 |
| - { |
88 |
| - linkIndex.Repositories.Add(repository, new Dictionary<string, LinkIndexEntry> |
89 |
| - { |
90 |
| - { branch, entry } |
91 |
| - }); |
92 |
| - } |
| 38 | + var newEntry = ConvertToLinkIndexEntry(s3Record, linkReference); |
| 39 | + await linkIndexProvider.UpdateLinkIndexEntry(newEntry); |
| 40 | + } |
| 41 | + } |
| 42 | + catch (Exception e) |
| 43 | + { |
| 44 | + // Add failed message identifier to the batchItemFailures list |
| 45 | + context.Logger.LogWarning(e, "Failed to process message {MessageId}", message.MessageId); |
| 46 | + batchItemFailures.Add(new SQSBatchResponse.BatchItemFailure |
| 47 | + { |
| 48 | + ItemIdentifier = message.MessageId |
93 | 49 | });
|
94 |
| - |
95 |
| - // If the response is truncated, set the request ContinuationToken |
96 |
| - // from the NextContinuationToken property of the response. |
97 |
| - request.ContinuationToken = response.NextContinuationToken; |
98 |
| - } while (response.IsTruncated); |
| 50 | + } |
99 | 51 | }
|
100 |
| - catch |
| 52 | + try |
101 | 53 | {
|
102 |
| - return null; |
| 54 | + await linkIndexProvider.Save(); |
| 55 | + var response = new SQSBatchResponse(batchItemFailures); |
| 56 | + if (batchItemFailures.Count > 0) |
| 57 | + context.Logger.LogInformation("Failed to process {batchItemFailuresCount} of {allMessagesCount} messages. Returning them to the queue.", batchItemFailures.Count, ev.Records.Count); |
| 58 | + return response; |
| 59 | + } |
| 60 | + catch (Exception ex) |
| 61 | + { |
| 62 | + // If we fail to update the link index, we need to return all messages to the queue |
| 63 | + // so that they can be retried later. |
| 64 | + context.Logger.LogError("Failed to update {bucketName}/{indexFile}. Returning all {recordCount} messages to the queue.", bucketName, indexFile, ev.Records.Count); |
| 65 | + context.Logger.LogError(ex, ex.Message); |
| 66 | + var response = new SQSBatchResponse(ev.Records.Select(r => new SQSBatchResponse.BatchItemFailure |
| 67 | + { |
| 68 | + ItemIdentifier = r.MessageId |
| 69 | + }).ToList()); |
| 70 | + return response; |
103 | 71 | }
|
104 |
| - |
105 |
| - return linkIndex; |
106 | 72 | }
|
107 | 73 |
|
108 |
| -static async Task<string> ReadLinkReferenceSha(IAmazonS3 client, S3Object obj) |
| 74 | +static LinkIndexEntry ConvertToLinkIndexEntry(S3EventNotification.S3EventNotificationRecord record, LinkReference linkReference) |
109 | 75 | {
|
110 |
| - try |
| 76 | + var s3Object = record.S3.Object; |
| 77 | + var keyTokens = s3Object.Key.Split('/'); |
| 78 | + var repository = keyTokens[1]; |
| 79 | + var branch = keyTokens[2]; |
| 80 | + return new LinkIndexEntry |
111 | 81 | {
|
112 |
| - var contents = await client.GetObjectAsync(obj.Key, obj.Key, CancellationToken.None); |
113 |
| - await using var s = contents.ResponseStream; |
114 |
| - var linkReference = LinkReference.Deserialize(s); |
115 |
| - return linkReference.Origin.Ref; |
116 |
| - } |
117 |
| - catch (Exception e) |
| 82 | + Repository = repository, |
| 83 | + Branch = branch, |
| 84 | + ETag = s3Object.ETag, |
| 85 | + Path = s3Object.Key, |
| 86 | + UpdatedAt = record.EventTime, |
| 87 | + GitReference = linkReference.Origin.Ref |
| 88 | + }; |
| 89 | +} |
| 90 | + |
| 91 | +static async Task<IReadOnlyCollection<(S3EventNotification.S3EventNotificationRecord, LinkReference)>> GetS3RecordLinkReferenceTuples(IAmazonS3 s3Client, |
| 92 | + SQSEvent.SQSMessage message, ILambdaContext context) |
| 93 | +{ |
| 94 | + var s3Event = S3EventNotification.ParseJson(message.Body); |
| 95 | + var recordLinkReferenceTuples = new ConcurrentBag<(S3EventNotification.S3EventNotificationRecord, LinkReference)>(); |
| 96 | + var linkReferenceProvider = new LinkReferenceProvider(s3Client, context.Logger, bucketName); |
| 97 | + await Parallel.ForEachAsync(s3Event.Records, async (record, ctx) => |
118 | 98 | {
|
119 |
| - Console.WriteLine(e); |
120 |
| - // it's important we don't fail here we need to fallback gracefully from this so we can fix the root cause |
121 |
| - // of why a repository is not reporting its git reference properly |
122 |
| - return "unknown"; |
123 |
| - } |
| 99 | + var linkReference = await linkReferenceProvider.GetLinkReference(record.S3.Object.Key, ctx); |
| 100 | + recordLinkReferenceTuples.Add((record, linkReference)); |
| 101 | + }); |
| 102 | + return recordLinkReferenceTuples; |
124 | 103 | }
|
0 commit comments