|
2 | 2 | // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. |
3 | 3 | // See the LICENSE file in the project root for more information |
4 | 4 |
|
5 | | -using System.Diagnostics; |
6 | | -using System.Text; |
| 5 | +using System.Collections.Concurrent; |
7 | 6 | using Amazon.Lambda.Core; |
8 | 7 | using Amazon.Lambda.RuntimeSupport; |
| 8 | +using Amazon.Lambda.Serialization.SystemTextJson; |
| 9 | +using Amazon.Lambda.SQSEvents; |
9 | 10 | using Amazon.S3; |
10 | | -using Amazon.S3.Model; |
| 11 | +using Amazon.S3.Util; |
| 12 | +using Elastic.Documentation.Lambda.LinkIndexUploader; |
11 | 13 | using Elastic.Markdown.IO.State; |
12 | 14 | using Elastic.Markdown.Links.CrossLinks; |
13 | 15 |
|
14 | 16 | const string bucketName = "elastic-docs-link-index"; |
| 17 | +const string indexFile = "link-index.json"; |
15 | 18 |
|
16 | | -await LambdaBootstrapBuilder.Create(Handler) |
17 | | - .Build() |
| 19 | +await LambdaBootstrapBuilder.Create<SQSEvent, SQSBatchResponse>(Handler, new SourceGeneratorLambdaJsonSerializer<SerializerContext>()) |
| 20 | + .Build() |
18 | 21 | .RunAsync(); |
19 | 22 |
|
20 | | -// Uncomment to test locally without uploading |
21 | | -// await CreateLinkIndex(new AmazonS3Client()); |
| 23 | +return; |
22 | 24 |
|
23 | | -#pragma warning disable CS8321 // Local function is declared but never used |
24 | | -static async Task<string> Handler(ILambdaContext context) |
25 | | -#pragma warning restore CS8321 // Local function is declared but never used |
| 25 | +// The SQS queue is configured to trigger when elastic/*/*/links.json files are created or updated. |
| 26 | +static async Task<SQSBatchResponse> Handler(SQSEvent ev, ILambdaContext context) |
26 | 27 | { |
27 | | - var sw = Stopwatch.StartNew(); |
28 | | - |
29 | | - IAmazonS3 s3Client = new AmazonS3Client(); |
30 | | - var linkIndex = await CreateLinkIndex(s3Client); |
31 | | - if (linkIndex == null) |
32 | | - return $"Error encountered on server. getting list of objects."; |
33 | | - |
34 | | - var json = LinkIndex.Serialize(linkIndex); |
35 | | - |
36 | | - using var stream = new MemoryStream(Encoding.UTF8.GetBytes(json)); |
37 | | - await s3Client.UploadObjectFromStreamAsync(bucketName, "link-index.json", stream, new Dictionary<string, object>(), CancellationToken.None); |
38 | | - return $"Finished in {sw}"; |
39 | | -} |
40 | | - |
41 | | - |
42 | | -static async Task<LinkIndex?> CreateLinkIndex(IAmazonS3 s3Client) |
43 | | -{ |
44 | | - var request = new ListObjectsV2Request |
45 | | - { |
46 | | - BucketName = bucketName, |
47 | | - MaxKeys = 1000 //default |
48 | | - }; |
49 | | - |
50 | | - var linkIndex = new LinkIndex |
51 | | - { |
52 | | - Repositories = [] |
53 | | - }; |
54 | | - try |
| 28 | + var s3Client = new AmazonS3Client(); |
| 29 | + var linkIndexProvider = new LinkIndexProvider(s3Client, context.Logger, bucketName, indexFile); |
| 30 | + var batchItemFailures = new List<SQSBatchResponse.BatchItemFailure>(); |
| 31 | + foreach (var message in ev.Records) |
55 | 32 | { |
56 | | - ListObjectsV2Response response; |
57 | | - do |
| 33 | + try |
58 | 34 | { |
59 | | - response = await s3Client.ListObjectsV2Async(request, CancellationToken.None); |
60 | | - await Parallel.ForEachAsync(response.S3Objects, async (obj, ctx) => |
| 35 | + var s3RecordLinkReferenceTuples = await GetS3RecordLinkReferenceTuples(s3Client, message, context); |
| 36 | + foreach (var (s3Record, linkReference) in s3RecordLinkReferenceTuples) |
61 | 37 | { |
62 | | - if (!obj.Key.StartsWith("elastic/", StringComparison.OrdinalIgnoreCase)) |
63 | | - return; |
64 | | - |
65 | | - var tokens = obj.Key.Split('/'); |
66 | | - if (tokens.Length < 3) |
67 | | - return; |
68 | | - |
69 | | - // TODO create a dedicated state file for git configuration |
70 | | - // Deserializing all of the links metadata adds significant overhead |
71 | | - var gitReference = await ReadLinkReferenceSha(s3Client, obj); |
72 | | - |
73 | | - var repository = tokens[1]; |
74 | | - var branch = tokens[2]; |
75 | | - |
76 | | - var entry = new LinkIndexEntry |
77 | | - { |
78 | | - Repository = repository, |
79 | | - Branch = branch, |
80 | | - ETag = obj.ETag.Trim('"'), |
81 | | - Path = obj.Key, |
82 | | - GitReference = gitReference |
83 | | - }; |
84 | | - if (linkIndex.Repositories.TryGetValue(repository, out var existingEntry)) |
85 | | - existingEntry[branch] = entry; |
86 | | - else |
87 | | - { |
88 | | - linkIndex.Repositories.Add(repository, new Dictionary<string, LinkIndexEntry> |
89 | | - { |
90 | | - { branch, entry } |
91 | | - }); |
92 | | - } |
| 38 | + var newEntry = ConvertToLinkIndexEntry(s3Record, linkReference); |
| 39 | + await linkIndexProvider.UpdateLinkIndexEntry(newEntry); |
| 40 | + } |
| 41 | + } |
| 42 | + catch (Exception e) |
| 43 | + { |
| 44 | + // Add failed message identifier to the batchItemFailures list |
| 45 | + context.Logger.LogWarning(e, "Failed to process message {MessageId}", message.MessageId); |
| 46 | + batchItemFailures.Add(new SQSBatchResponse.BatchItemFailure |
| 47 | + { |
| 48 | + ItemIdentifier = message.MessageId |
93 | 49 | }); |
94 | | - |
95 | | - // If the response is truncated, set the request ContinuationToken |
96 | | - // from the NextContinuationToken property of the response. |
97 | | - request.ContinuationToken = response.NextContinuationToken; |
98 | | - } while (response.IsTruncated); |
| 50 | + } |
99 | 51 | } |
100 | | - catch |
| 52 | + try |
101 | 53 | { |
102 | | - return null; |
| 54 | + await linkIndexProvider.Save(); |
| 55 | + var response = new SQSBatchResponse(batchItemFailures); |
| 56 | + if (batchItemFailures.Count > 0) |
| 57 | + context.Logger.LogInformation("Failed to process {batchItemFailuresCount} of {allMessagesCount} messages. Returning them to the queue.", batchItemFailures.Count, ev.Records.Count); |
| 58 | + return response; |
| 59 | + } |
| 60 | + catch (Exception ex) |
| 61 | + { |
| 62 | + // If we fail to update the link index, we need to return all messages to the queue |
| 63 | + // so that they can be retried later. |
| 64 | + context.Logger.LogError("Failed to update {bucketName}/{indexFile}. Returning all {recordCount} messages to the queue.", bucketName, indexFile, ev.Records.Count); |
| 65 | + context.Logger.LogError(ex, ex.Message); |
| 66 | + var response = new SQSBatchResponse(ev.Records.Select(r => new SQSBatchResponse.BatchItemFailure |
| 67 | + { |
| 68 | + ItemIdentifier = r.MessageId |
| 69 | + }).ToList()); |
| 70 | + return response; |
103 | 71 | } |
104 | | - |
105 | | - return linkIndex; |
106 | 72 | } |
107 | 73 |
|
108 | | -static async Task<string> ReadLinkReferenceSha(IAmazonS3 client, S3Object obj) |
| 74 | +static LinkIndexEntry ConvertToLinkIndexEntry(S3EventNotification.S3EventNotificationRecord record, LinkReference linkReference) |
109 | 75 | { |
110 | | - try |
| 76 | + var s3Object = record.S3.Object; |
| 77 | + var keyTokens = s3Object.Key.Split('/'); |
| 78 | + var repository = keyTokens[1]; |
| 79 | + var branch = keyTokens[2]; |
| 80 | + return new LinkIndexEntry |
111 | 81 | { |
112 | | - var contents = await client.GetObjectAsync(obj.Key, obj.Key, CancellationToken.None); |
113 | | - await using var s = contents.ResponseStream; |
114 | | - var linkReference = LinkReference.Deserialize(s); |
115 | | - return linkReference.Origin.Ref; |
116 | | - } |
117 | | - catch (Exception e) |
| 82 | + Repository = repository, |
| 83 | + Branch = branch, |
| 84 | + ETag = s3Object.ETag, |
| 85 | + Path = s3Object.Key, |
| 86 | + UpdatedAt = record.EventTime, |
| 87 | + GitReference = linkReference.Origin.Ref |
| 88 | + }; |
| 89 | +} |
| 90 | + |
| 91 | +static async Task<IReadOnlyCollection<(S3EventNotification.S3EventNotificationRecord, LinkReference)>> GetS3RecordLinkReferenceTuples(IAmazonS3 s3Client, |
| 92 | + SQSEvent.SQSMessage message, ILambdaContext context) |
| 93 | +{ |
| 94 | + var s3Event = S3EventNotification.ParseJson(message.Body); |
| 95 | + var recordLinkReferenceTuples = new ConcurrentBag<(S3EventNotification.S3EventNotificationRecord, LinkReference)>(); |
| 96 | + var linkReferenceProvider = new LinkReferenceProvider(s3Client, context.Logger, bucketName); |
| 97 | + await Parallel.ForEachAsync(s3Event.Records, async (record, ctx) => |
118 | 98 | { |
119 | | - Console.WriteLine(e); |
120 | | - // it's important we don't fail here we need to fallback gracefully from this so we can fix the root cause |
121 | | - // of why a repository is not reporting its git reference properly |
122 | | - return "unknown"; |
123 | | - } |
| 99 | + var linkReference = await linkReferenceProvider.GetLinkReference(record.S3.Object.Key, ctx); |
| 100 | + recordLinkReferenceTuples.Add((record, linkReference)); |
| 101 | + }); |
| 102 | + return recordLinkReferenceTuples; |
124 | 103 | } |
0 commit comments