Skip to content

Commit 28269ae

Browse files
committed
elias-fano, roaring bitmaps
1 parent e1fcf8a commit 28269ae

19 files changed

Lines changed: 3165 additions & 0 deletions
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
using System;
2+
using System.IO;
3+
using System.Runtime.InteropServices;
4+
using Infidex.Indexing.Compression;
5+
using Microsoft.VisualStudio.TestTools.UnitTesting;
6+
7+
namespace Infidex.Tests;
8+
9+
[TestClass]
10+
public class CompactArrayTests
11+
{
12+
[TestMethod]
13+
public void TestBasicEncodingDecoding()
14+
{
15+
long[] values = [5, 2, 9, 100, 0, 5, 10, 90, 9, 1, 65, 10];
16+
17+
CompactArray arr = CompactArray.Create(values);
18+
19+
Assert.AreEqual(values.Length, arr.Count);
20+
Assert.AreEqual(7, arr.Width);
21+
22+
for (int i = 0; i < values.Length; i++)
23+
{
24+
Assert.AreEqual((ulong)values[i], arr.Get(i));
25+
}
26+
}
27+
28+
[TestMethod]
29+
public void TestEmpty()
30+
{
31+
long[] values = [];
32+
CompactArray arr = CompactArray.Create(values);
33+
Assert.AreEqual(0, arr.Count);
34+
Assert.AreEqual(1, arr.Width);
35+
}
36+
37+
[TestMethod]
38+
public void TestZeroes()
39+
{
40+
long[] values = [0, 0, 0, 0];
41+
CompactArray arr = CompactArray.Create(values);
42+
Assert.AreEqual(4, arr.Count);
43+
Assert.AreEqual(1, arr.Width);
44+
45+
for (int i = 0; i < values.Length; i++)
46+
{
47+
Assert.AreEqual(0UL, arr.Get(i));
48+
}
49+
}
50+
51+
[TestMethod]
52+
public void TestLargeValues()
53+
{
54+
long[] values = [unchecked((long)ulong.MaxValue), 0, unchecked((long)(ulong.MaxValue >> 1)), 1234567890123456789];
55+
56+
CompactArray arr = CompactArray.Create(values);
57+
58+
Assert.AreEqual(64, arr.Width);
59+
60+
for (int i = 0; i < values.Length; i++)
61+
{
62+
Assert.AreEqual((ulong)values[i], arr.Get(i));
63+
}
64+
}
65+
66+
[TestMethod]
67+
public void TestBoundaryCrossing()
68+
{
69+
long[] values = [1L << 32, (1L << 32) | 1, 12345];
70+
71+
CompactArray arr = CompactArray.Create(values);
72+
Assert.IsTrue(arr.Width >= 33);
73+
74+
for (int i = 0; i < values.Length; i++)
75+
{
76+
Assert.AreEqual((ulong)values[i], arr.Get(i));
77+
}
78+
}
79+
80+
[TestMethod]
81+
public void TestSerialization()
82+
{
83+
long[] values = [5, 2, 9, 100, 0, 5, 10, 90, 9, 1, 65, 10];
84+
CompactArray original = CompactArray.Create(values);
85+
86+
using MemoryStream ms = new MemoryStream();
87+
using BinaryWriter writer = new BinaryWriter(ms);
88+
original.Write(writer);
89+
90+
ms.Position = 0;
91+
using BinaryReader reader = new BinaryReader(ms);
92+
CompactArray loaded = CompactArray.Read(reader);
93+
94+
Assert.AreEqual(original.Count, loaded.Count);
95+
Assert.AreEqual(original.Width, loaded.Width);
96+
97+
for (int i = 0; i < values.Length; i++)
98+
{
99+
Assert.AreEqual(original.Get(i), loaded.Get(i));
100+
}
101+
}
102+
103+
[TestMethod]
104+
public void TestOptimizedSerializationIntegrity()
105+
{
106+
long[] values = new long[1000];
107+
for(int i=0; i<1000; i++) values[i] = (long)i * 123456789;
108+
CompactArray original = CompactArray.Create(values);
109+
110+
using (MemoryStream ms = new MemoryStream())
111+
using (BinaryWriter writer = new BinaryWriter(ms))
112+
{
113+
writer.Write(original.Width);
114+
writer.Write(original.Count);
115+
writer.Write(original.Data.Length);
116+
for(int i=0; i<original.Data.Length; i++) writer.Write(original.Data[i]);
117+
118+
ms.Position = 0;
119+
using (BinaryReader reader = new BinaryReader(ms))
120+
{
121+
CompactArray loaded = CompactArray.Read(reader);
122+
123+
Assert.AreEqual(original.Count, loaded.Count);
124+
for(int i=0; i<values.Length; i++) Assert.AreEqual(values[i], (long)loaded.Get(i));
125+
}
126+
}
127+
128+
using (MemoryStream ms = new MemoryStream())
129+
using (BinaryWriter writer = new BinaryWriter(ms))
130+
{
131+
original.Write(writer);
132+
133+
ms.Position = 0;
134+
using (BinaryReader reader = new BinaryReader(ms))
135+
{
136+
int width = reader.ReadInt32();
137+
int count = reader.ReadInt32();
138+
int dataLen = reader.ReadInt32();
139+
140+
Assert.AreEqual(original.Width, width);
141+
Assert.AreEqual(original.Count, count);
142+
Assert.AreEqual(original.Data.Length, dataLen);
143+
144+
ulong[] data = new ulong[dataLen];
145+
for(int i=0; i<dataLen; i++) data[i] = reader.ReadUInt64();
146+
147+
CompactArray loaded = new CompactArray(data, width, count);
148+
for(int i=0; i<values.Length; i++) Assert.AreEqual(values[i], (long)loaded.Get(i));
149+
}
150+
}
151+
152+
if (BitConverter.IsLittleEndian)
153+
{
154+
byte[] manualBytes;
155+
byte[] optimizedBytes;
156+
157+
using (MemoryStream ms = new MemoryStream())
158+
using (BinaryWriter writer = new BinaryWriter(ms))
159+
{
160+
writer.Write(original.Width);
161+
writer.Write(original.Count);
162+
writer.Write(original.Data.Length);
163+
for(int i=0; i<original.Data.Length; i++) writer.Write(original.Data[i]);
164+
manualBytes = ms.ToArray();
165+
}
166+
167+
using (MemoryStream ms = new MemoryStream())
168+
using (BinaryWriter writer = new BinaryWriter(ms))
169+
{
170+
original.Write(writer);
171+
optimizedBytes = ms.ToArray();
172+
}
173+
174+
CollectionAssert.AreEqual(manualBytes, optimizedBytes, "Optimized serialization produced different bytes than standard loop!");
175+
}
176+
}
177+
}

src/Infidex.Tests/DArrayTests.cs

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
using Infidex.Indexing.Compression;
2+
3+
namespace Infidex.Tests;
4+
5+
[TestClass]
6+
public class DArrayTests
7+
{
8+
[TestMethod]
9+
public void TestDenseBitSet()
10+
{
11+
int n = 10000;
12+
BitSet bitSet = new BitSet(n);
13+
List<int> positions = [];
14+
15+
Random r = new Random(42);
16+
for (int i = 0; i < n; i++)
17+
{
18+
if (r.NextDouble() < 0.5)
19+
{
20+
bitSet.Set(i);
21+
positions.Add(i);
22+
}
23+
}
24+
25+
DArray dArray = DArray.Build(bitSet, select1: true);
26+
27+
for (int i = 0; i < positions.Count; i++)
28+
{
29+
long pos = dArray.Select(bitSet, i);
30+
Assert.AreEqual(positions[i], pos, $"Failed at index {i}");
31+
}
32+
}
33+
34+
[TestMethod]
35+
public void TestSparseBitSet()
36+
{
37+
int n = 100000;
38+
BitSet bitSet = new BitSet(n);
39+
List<int> positions = [];
40+
41+
Random r = new Random(42);
42+
for (int i = 0; i < n; i++)
43+
{
44+
if (r.NextDouble() < 0.01) // 1% set bits
45+
{
46+
bitSet.Set(i);
47+
positions.Add(i);
48+
}
49+
}
50+
51+
DArray dArray = DArray.Build(bitSet, select1: true);
52+
53+
for (int i = 0; i < positions.Count; i++)
54+
{
55+
long pos = dArray.Select(bitSet, i);
56+
Assert.AreEqual(positions[i], pos, $"Failed at index {i}");
57+
}
58+
}
59+
60+
[TestMethod]
61+
public void TestSelect0()
62+
{
63+
int n = 1000;
64+
BitSet bitSet = new BitSet(n);
65+
// Initially all 0s. Let's set some to 1 to make it interesting.
66+
// We want to find positions of 0s.
67+
68+
// Set all to 1 first? No, default is 0.
69+
// Let's set indices 10, 20, 30 to 1.
70+
// 0s are at 0..9, 11..19, 21..29, 31..999
71+
72+
bitSet.Set(10);
73+
bitSet.Set(20);
74+
bitSet.Set(30);
75+
76+
DArray dArray = DArray.Build(bitSet, select1: false);
77+
78+
// 0th zero is at 0
79+
Assert.AreEqual(0, dArray.Select(bitSet, 0));
80+
// 9th zero is at 9
81+
Assert.AreEqual(9, dArray.Select(bitSet, 9));
82+
// 10th zero is at 11 (since 10 is set)
83+
Assert.AreEqual(11, dArray.Select(bitSet, 10));
84+
}
85+
86+
[TestMethod]
87+
public void TestSerialization()
88+
{
89+
int n = 10000;
90+
BitSet bitSet = new BitSet(n);
91+
Random r = new Random(123);
92+
for (int i = 0; i < n; i++)
93+
{
94+
if (r.NextDouble() < 0.5) bitSet.Set(i);
95+
}
96+
97+
DArray original = DArray.Build(bitSet, select1: true);
98+
99+
using MemoryStream ms = new MemoryStream();
100+
using BinaryWriter writer = new BinaryWriter(ms);
101+
original.Write(writer);
102+
103+
ms.Position = 0;
104+
using BinaryReader reader = new BinaryReader(ms);
105+
DArray loaded = DArray.Read(reader, select1: true);
106+
107+
// Test a few select operations
108+
int count = bitSet.PopCount();
109+
for (int i = 0; i < count; i += 100)
110+
{
111+
Assert.AreEqual(original.Select(bitSet, i), loaded.Select(bitSet, i));
112+
}
113+
}
114+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
using System;
2+
using System.IO;
3+
using Infidex.Indexing.Compression;
4+
using Microsoft.VisualStudio.TestTools.UnitTesting;
5+
6+
namespace Infidex.Tests;
7+
8+
[TestClass]
9+
public class EliasFanoTests
10+
{
11+
[TestMethod]
12+
public void TestEncodeDecode()
13+
{
14+
long[] values = [1, 5, 10, 100, 1000, 1234, 5000];
15+
EliasFano ef = EliasFano.Encode(values);
16+
17+
Assert.AreEqual(values.Length, ef.Count);
18+
19+
for (int i = 0; i < values.Length; i++)
20+
{
21+
Assert.AreEqual(values[i], ef.Get(i));
22+
}
23+
}
24+
25+
[TestMethod]
26+
public void TestEmpty()
27+
{
28+
long[] values = [];
29+
EliasFano ef = EliasFano.Encode(values);
30+
Assert.AreEqual(0, ef.Count);
31+
}
32+
33+
[TestMethod]
34+
public void TestRandomData()
35+
{
36+
int n = 10000;
37+
long[] values = new long[n];
38+
Random r = new Random(12345);
39+
long current = 0;
40+
for (int i = 0; i < n; i++)
41+
{
42+
current += r.Next(1, 50); // Strictly increasing
43+
values[i] = current;
44+
}
45+
46+
EliasFano ef = EliasFano.Encode(values);
47+
48+
for (int i = 0; i < n; i++)
49+
{
50+
Assert.AreEqual(values[i], ef.Get(i), $"Failed at index {i}");
51+
}
52+
}
53+
54+
[TestMethod]
55+
public void TestSerialization()
56+
{
57+
long[] values = [1, 5, 10, 100, 1000, 1234, 5000];
58+
EliasFano original = EliasFano.Encode(values);
59+
60+
using MemoryStream ms = new MemoryStream();
61+
using BinaryWriter writer = new BinaryWriter(ms);
62+
original.Write(writer);
63+
64+
ms.Position = 0;
65+
using BinaryReader reader = new BinaryReader(ms);
66+
EliasFano loaded = EliasFano.Read(reader);
67+
68+
Assert.AreEqual(original.Count, loaded.Count);
69+
70+
for (int i = 0; i < values.Length; i++)
71+
{
72+
Assert.AreEqual(original.Get(i), loaded.Get(i));
73+
}
74+
}
75+
}

0 commit comments

Comments
 (0)