Skip to content

Commit 3c91aad

Browse files
committed
Add all versions in version dropdown and filter out non-existing URLs
1 parent bce82f1 commit 3c91aad

File tree

13 files changed

+379
-7
lines changed

13 files changed

+379
-7
lines changed

Directory.Packages.props

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
<PackageVersion Include="FakeItEasy" Version="8.3.0" />
2020
<PackageVersion Include="Elastic.Ingest.Elasticsearch" Version="0.11.3" />
2121
<PackageVersion Include="Microsoft.OpenApi" Version="2.0.0-preview9" />
22+
<PackageVersion Include="Octokit" Version="14.0.0" />
2223
<PackageVersion Include="System.Text.Json" Version="9.0.5" />
24+
<PackageVersion Include="TestableIO.System.IO.Abstractions.Wrappers" Version="22.0.14" />
2325
</ItemGroup>
2426
<!-- Build -->
2527
<ItemGroup>

docs-builder.sln

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.ApiExplorer.Tests",
107107
EndProject
108108
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Site", "src\Elastic.Documentation.Site\Elastic.Documentation.Site.csproj", "{89B83007-71E6-4B57-BA78-2544BFA476DB}"
109109
EndProject
110+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.LegacyPageLookup", "src\Elastic.Documentation.LegacyPageLookup\Elastic.Documentation.LegacyPageLookup.csproj", "{111E7029-BB29-4039-9B45-04776798A8DD}"
111+
EndProject
112+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.LegacyPageLookup.Tests", "tsts\Elastic.Documentation.LegacyPageLookup.Tests\Elastic.Documentation.LegacyPageLookup.Tests.csproj", "{E205D6BD-AC1E-4B2A-B462-8E59781833D8}"
113+
EndProject
110114
Global
111115
GlobalSection(SolutionConfigurationPlatforms) = preSolution
112116
Debug|Any CPU = Debug|Any CPU
@@ -184,6 +188,14 @@ Global
184188
{89B83007-71E6-4B57-BA78-2544BFA476DB}.Debug|Any CPU.Build.0 = Debug|Any CPU
185189
{89B83007-71E6-4B57-BA78-2544BFA476DB}.Release|Any CPU.ActiveCfg = Release|Any CPU
186190
{89B83007-71E6-4B57-BA78-2544BFA476DB}.Release|Any CPU.Build.0 = Release|Any CPU
191+
{111E7029-BB29-4039-9B45-04776798A8DD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
192+
{111E7029-BB29-4039-9B45-04776798A8DD}.Debug|Any CPU.Build.0 = Debug|Any CPU
193+
{111E7029-BB29-4039-9B45-04776798A8DD}.Release|Any CPU.ActiveCfg = Release|Any CPU
194+
{111E7029-BB29-4039-9B45-04776798A8DD}.Release|Any CPU.Build.0 = Release|Any CPU
195+
{E205D6BD-AC1E-4B2A-B462-8E59781833D8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
196+
{E205D6BD-AC1E-4B2A-B462-8E59781833D8}.Debug|Any CPU.Build.0 = Debug|Any CPU
197+
{E205D6BD-AC1E-4B2A-B462-8E59781833D8}.Release|Any CPU.ActiveCfg = Release|Any CPU
198+
{E205D6BD-AC1E-4B2A-B462-8E59781833D8}.Release|Any CPU.Build.0 = Release|Any CPU
187199
EndGlobalSection
188200
GlobalSection(NestedProjects) = preSolution
189201
{4D198E25-C211-41DC-9E84-B15E89BD7048} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
@@ -212,5 +224,7 @@ Global
212224
{C883AC18-7C6A-482E-A9D7-C44DF8633425} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
213225
{0331559E-4ED1-4A56-9C35-3EAD4D7E696D} = {67B576EE-02FA-4F9B-94BC-3630BC09ECE5}
214226
{89B83007-71E6-4B57-BA78-2544BFA476DB} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
227+
{111E7029-BB29-4039-9B45-04776798A8DD} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
228+
{E205D6BD-AC1E-4B2A-B462-8E59781833D8} = {67B576EE-02FA-4F9B-94BC-3630BC09ECE5}
215229
EndGlobalSection
216230
EndGlobal
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.Collections;
6+
using System.Text;
7+
8+
namespace Elastic.Documentation.LegacyPageLookup;
9+
10+
public class BloomFilter
11+
{
12+
/// <summary>
13+
/// The bit array for the filter.
14+
/// </summary>
15+
private readonly BitArray _bitArray;
16+
17+
/// <summary>
18+
/// The size of the bit array.
19+
/// </summary>
20+
private int Size => _bitArray.Length;
21+
22+
/// <summary>
23+
/// The number of hash functions used.
24+
/// </summary>
25+
private int HashCount { get; }
26+
27+
/// <summary>
28+
/// Private constructor to be used by factory methods.
29+
/// </summary>
30+
private BloomFilter(int size, int hashCount)
31+
{
32+
if (size <= 0)
33+
throw new ArgumentOutOfRangeException(nameof(size), "Size must be greater than zero.");
34+
if (hashCount <= 0)
35+
throw new ArgumentOutOfRangeException(nameof(hashCount), "Hash count must be greater than zero.");
36+
37+
_bitArray = new BitArray(size);
38+
HashCount = hashCount;
39+
}
40+
41+
/// <summary>
42+
/// Initializes a new BloomFilter with optimal parameters based on expected items and false positive probability.
43+
/// </summary>
44+
/// <param name="expectedItems">The expected number of items to be stored.</param>
45+
/// <param name="falsePositiveProbability">The desired false positive probability (e.g., 0.01 for 1%).</param>
46+
private BloomFilter(int expectedItems, double falsePositiveProbability)
47+
{
48+
if (expectedItems <= 0)
49+
throw new ArgumentOutOfRangeException(nameof(expectedItems), "Expected items must be greater than zero.");
50+
if (falsePositiveProbability is <= 0.0 or >= 1.0)
51+
throw new ArgumentOutOfRangeException(nameof(falsePositiveProbability), "False positive probability must be between 0 and 1.");
52+
53+
var size = GetOptimalSize(expectedItems, falsePositiveProbability);
54+
var hashCount = GetOptimalHashCount(size, expectedItems);
55+
56+
_bitArray = new BitArray(size);
57+
HashCount = hashCount;
58+
}
59+
60+
/// <summary>
61+
/// Adds an item to the Bloom Filter.
62+
/// </summary>
63+
/// <param name="item">The item to add. The string will be UTF-8 encoded for hashing.</param>
64+
private void Add(string item)
65+
{
66+
var itemBytes = Encoding.UTF8.GetBytes(item);
67+
for (var i = 0; i < HashCount; i++)
68+
{
69+
var hash = GetHash(itemBytes, i);
70+
_bitArray[hash] = true;
71+
}
72+
}
73+
74+
/// <summary>
75+
/// Checks if an item is possibly in the set.
76+
/// </summary>
77+
/// <param name="item">The item to check.</param>
78+
/// <returns>False if the item is definitely not in the set, True if it might be.</returns>
79+
public bool Check(string item)
80+
{
81+
var itemBytes = Encoding.UTF8.GetBytes(item.Replace("/guide/en/", ""));
82+
for (var i = 0; i < HashCount; i++)
83+
{
84+
var hash = GetHash(itemBytes, i);
85+
if (!_bitArray[hash])
86+
return false;
87+
}
88+
return true;
89+
}
90+
91+
/// <summary>
92+
/// Hashes the input data using Murmur3 with a given seed.
93+
/// </summary>
94+
private int GetHash(byte[] data, int seed)
95+
{
96+
// Using a seeded MurmurHash3 implementation for good distribution.
97+
// A simple GetHashCode() is not sufficient as it can change between versions/processes.
98+
// Here we use a custom implementation detail for demonstration.
99+
// In a real-world scenario, you might use a library like System.IO.Hashing.MurmurHash3.
100+
const uint c1 = 0xcc9e2d51;
101+
const uint c2 = 0x1b873593;
102+
const int r1 = 15;
103+
const int m = 5;
104+
const uint n = 0xe6546b64;
105+
106+
var hash = (uint)seed;
107+
var length = data.Length;
108+
var nblocks = length / 4;
109+
110+
for (var i = 0; i < nblocks; i++)
111+
{
112+
var k = BitConverter.ToUInt32(data, i * 4);
113+
k *= c1;
114+
k = (k << r1) | (k >> (32 - r1));
115+
k *= c2;
116+
117+
hash ^= k;
118+
hash = (hash << 13) | (hash >> 19);
119+
hash = (hash * m) + n;
120+
}
121+
122+
// This is a simplified hash generation. A full implementation would handle the tail.
123+
// For our purpose, this is sufficient to generate distinct hashes per seed.
124+
return (int)(Math.Abs(hash) % _bitArray.Length);
125+
}
126+
127+
/// <summary>
128+
/// Creates a new BloomFilter from a collection of items.
129+
/// </summary>
130+
/// <param name="items">The collection of string items to add.</param>
131+
/// <param name="falsePositiveProbability">The desired false positive probability.</param>
132+
/// <returns>A new BloomFilter instance populated with the items.</returns>
133+
public static BloomFilter FromCollection(IEnumerable<string> items, double falsePositiveProbability)
134+
{
135+
var itemList = new List<string>(items);
136+
var filter = new BloomFilter(itemList.Count, falsePositiveProbability);
137+
foreach (var item in itemList)
138+
{
139+
filter.Add(item);
140+
}
141+
142+
return filter;
143+
}
144+
145+
// --- Persistence Methods ---
146+
147+
/// <summary>
148+
/// Saves the Bloom Filter's state to a binary file.
149+
/// The format is: [4-byte Size int][4-byte HashCount int][bit array bytes]
150+
/// </summary>
151+
/// <param name="filePath">The path to the file.</param>
152+
public void Save(string filePath)
153+
{
154+
using var stream = File.Open(filePath, FileMode.Create);
155+
using var writer = new BinaryWriter(stream);
156+
// 1. Write the Size and HashCount as integers
157+
writer.Write(Size);
158+
writer.Write(HashCount);
159+
160+
// 2. Write the bit array
161+
var bitArrayBytes = new byte[(Size + 7) / 8];
162+
_bitArray.CopyTo(bitArrayBytes, 0);
163+
writer.Write(bitArrayBytes);
164+
}
165+
166+
/// <summary>
167+
/// Loads a Bloom Filter from a binary file.
168+
/// </summary>
169+
/// <param name="filePath">The path to the file containing the filter data.</param>
170+
/// <returns>A new BloomFilter instance.</returns>
171+
public static BloomFilter Load(string filePath)
172+
{
173+
using var stream = File.OpenRead(filePath);
174+
using var reader = new BinaryReader(stream);
175+
// 1. Read metadata (Size and HashCount)
176+
var size = reader.ReadInt32();
177+
var hashCount = reader.ReadInt32();
178+
179+
// 2. Create a new filter with the loaded parameters
180+
var filter = new BloomFilter(size, hashCount);
181+
182+
// 3. Read the bit array data
183+
var byteCount = (size + 7) / 8;
184+
var bitArrayBytes = reader.ReadBytes(byteCount);
185+
186+
// Re-initialize the internal BitArray with the loaded data
187+
for (var i = 0; i < size; i++)
188+
{
189+
if ((bitArrayBytes[i / 8] & (1 << (i % 8))) != 0)
190+
{
191+
filter._bitArray[i] = true;
192+
}
193+
}
194+
195+
return filter;
196+
}
197+
198+
199+
// --- Optimal Parameter Calculation ---
200+
201+
/// <summary>
202+
/// Calculates the optimal size of the bit array (m).
203+
/// Formula: m = - (n * log(p)) / (log(2)^2)
204+
/// </summary>
205+
private static int GetOptimalSize(int n, double p) => (int)Math.Ceiling(-1 * (n * Math.Log(p)) / Math.Pow(Math.Log(2), 2));
206+
207+
/// <summary>
208+
/// Calculates the optimal number of hash functions (k).
209+
/// Formula: k = (m/n) * log(2)
210+
/// </summary>
211+
private static int GetOptimalHashCount(int m, int n) => (int)Math.Ceiling((double)m / n * Math.Log(2));
212+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>net9.0</TargetFramework>
5+
<ImplicitUsings>enable</ImplicitUsings>
6+
<Nullable>enable</Nullable>
7+
</PropertyGroup>
8+
9+
<ItemGroup>
10+
<PackageReference Include="System.IO.Abstractions" />
11+
</ItemGroup>
12+
13+
<ItemGroup>
14+
<ProjectReference Include="..\Elastic.Documentation.Configuration\Elastic.Documentation.Configuration.csproj" />
15+
</ItemGroup>
16+
17+
<ItemGroup>
18+
<EmbeddedResource Include="legacy-pages.bloom.bin" />
19+
</ItemGroup>
20+
21+
</Project>
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.IO.Abstractions;
6+
using Elastic.Documentation.Configuration;
7+
8+
namespace Elastic.Documentation.LegacyPageLookup;
9+
10+
public class LegacyPageLookup(IFileSystem fs)
11+
{
12+
private BloomFilter? _bloomFilter;
13+
private readonly string _bloomFilterBinaryPath = Path.Combine(Paths.WorkingDirectoryRoot.FullName, "src", "Elastic.Documentation.LegacyPageLookup", "legacy-pages.bloom.bin");
14+
15+
16+
public bool PathExists(string path)
17+
{
18+
_bloomFilter ??= LoadBloomFilter();
19+
return _bloomFilter.Check(path);
20+
}
21+
22+
private BloomFilter LoadBloomFilter()
23+
{
24+
var bloomFilterBinaryInfo = fs.FileInfo.New(_bloomFilterBinaryPath);
25+
_bloomFilter ??= BloomFilter.Load(bloomFilterBinaryInfo.FullName);
26+
return _bloomFilter;
27+
}
28+
29+
public void GenerateBloomFilterBinary(IPagesProvider pagesProvider)
30+
{
31+
var pages = pagesProvider.GetPages();
32+
var enumerable = pages as string[] ?? pages.ToArray();
33+
var paths = enumerable.ToHashSet();
34+
var bloomFilter = BloomFilter.FromCollection(enumerable, 0.001);
35+
Console.WriteLine(paths.Count);
36+
bloomFilter.Save(_bloomFilterBinaryPath);
37+
}
38+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
6+
using Elastic.Documentation.Configuration;
7+
8+
namespace Elastic.Documentation.LegacyPageLookup;
9+
10+
public interface IPagesProvider
11+
{
12+
IEnumerable<string> GetPages();
13+
}
14+
15+
public class LocalPagesProvider(string gitRepositoryPath) : IPagesProvider
16+
{
17+
public IEnumerable<string> GetPages() =>
18+
Directory.EnumerateFiles(Path.Combine(gitRepositoryPath, "html", "en"), "*.html", SearchOption.AllDirectories)
19+
.Select(i =>
20+
{
21+
var relativePath = Path.GetRelativePath(Path.Combine(gitRepositoryPath, "html", "en"), i).Replace('\\', '/');
22+
return relativePath;
23+
});
24+
}
Binary file not shown.

src/Elastic.Markdown/Elastic.Markdown.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
</ItemGroup>
2929

3030
<ItemGroup>
31+
<ProjectReference Include="..\Elastic.Documentation.LegacyPageLookup\Elastic.Documentation.LegacyPageLookup.csproj" />
3132
<ProjectReference Include="..\Elastic.Documentation.Site\Elastic.Documentation.Site.csproj" />
3233
<ProjectReference Include="..\Elastic.Documentation.LinkIndex\Elastic.Documentation.LinkIndex.csproj" />
3334
<ProjectReference Include="..\Elastic.Documentation\Elastic.Documentation.csproj" />

src/Elastic.Markdown/Slices/HtmlWriter.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using Elastic.Documentation;
77
using Elastic.Documentation.Configuration.Builder;
88
using Elastic.Documentation.Legacy;
9+
using Elastic.Documentation.LegacyPageLookup;
910
using Elastic.Documentation.Site.FileProviders;
1011
using Elastic.Documentation.Site.Navigation;
1112
using Elastic.Markdown.Extensions.DetectionRules;
@@ -85,6 +86,8 @@ private async Task<string> RenderLayout(MarkdownFile markdown, MarkdownDocument
8586
.Distinct()
8687
.ToHashSet();
8788

89+
var legacyPageLookup = new LegacyPageLookup(new FileSystem());
90+
8891
var slice = Index.Create(new IndexViewModel
8992
{
9093
SiteName = siteName,
@@ -111,7 +114,8 @@ private async Task<string> RenderLayout(MarkdownFile markdown, MarkdownDocument
111114
StaticFileContentHashProvider = StaticFileContentHashProvider,
112115
ReportIssueUrl = reportUrl,
113116
CurrentVersion = legacyPages.Count > 0 ? legacyPages.ElementAt(0).Version : "9.0+",
114-
LegacyPages = legacyPages.Count > 1 ? [legacyPages.ElementAt(1)] : [],
117+
LegacyPages = legacyPages.Skip(1).Where(
118+
l => Uri.TryCreate(l.ToString(), UriKind.Absolute, out var uri) && legacyPageLookup.PathExists(uri.AbsolutePath)).ToArray(),
115119
VersionDropdownItems = VersionDrownDownItemViewModel.FromLegacyPageMappings(legacyPages.Skip(1).ToArray()),
116120
Products = allProducts
117121
});

0 commit comments

Comments
 (0)