Skip to content

Commit 26d1d46

Browse files
mitikovogxd
andauthored
Optimize Create (no Linq) (#10)
* Optimize Create and add tests * Add benchmark on Create method * Remove old Create methods --------- Co-authored-by: Olivier Giniaux <[email protected]>
1 parent ea76b7d commit 26d1d46

File tree

8 files changed

+103
-15
lines changed

8 files changed

+103
-15
lines changed

Equativ.RoaringBitmaps.Benchmarks/CRoaringBenchmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
using Equativ.RoaringBitmaps.Datasets;
44
using Roaring.Net.CRoaring;
55

6-
namespace Equativ.RoaringBitmaps.Benchmark;
6+
namespace Equativ.RoaringBitmaps.Benchmarks;
77

88
// Ran on Macbook pro M1
99
// ⚠️Unmanaged allocation happening in CRoaring are not tracked by the memory diagnoser.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
using BenchmarkDotNet.Attributes;
2+
using BenchmarkDotNet.Engines;
3+
using Equativ.RoaringBitmaps.Datasets;
4+
5+
namespace Equativ.RoaringBitmaps.Benchmarks;
6+
7+
[MemoryDiagnoser(false)]
8+
public class CreateBenchmark
9+
{
10+
private List<int>[] _values;
11+
private readonly Consumer _consumer = new();
12+
13+
[Params(
14+
Paths.Census1881,
15+
Paths.Census1881Srt,
16+
Paths.CensusIncome,
17+
Paths.Census1881Srt,
18+
Paths.Dimension003,
19+
Paths.Dimension008,
20+
Paths.Dimension033,
21+
Paths.UsCensus2000,
22+
Paths.WeatherSept85,
23+
Paths.WeatherSept85Srt,
24+
Paths.WikileaksNoQuotes,
25+
Paths.WikileaksNoQuotesSrt)]
26+
public string FileName { get; set; }
27+
28+
[GlobalSetup]
29+
public void Setup()
30+
{
31+
using var provider = new ZipRealDataProvider(FileName);
32+
_values = provider.EnumerateValues().ToArray();
33+
}
34+
35+
[Benchmark]
36+
public void Create()
37+
{
38+
for (var k = 0; k < _values.Length - 1; k++)
39+
{
40+
_consumer.Consume(RoaringBitmap.Create(_values[k]));
41+
}
42+
}
43+
}

Equativ.RoaringBitmaps.Benchmarks/DatasetsBenchmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using BenchmarkDotNet.Attributes;
22
using Equativ.RoaringBitmaps.Datasets;
33

4-
namespace Equativ.RoaringBitmaps.Benchmark;
4+
namespace Equativ.RoaringBitmaps.Benchmarks;
55

66
[MemoryDiagnoser(false)]
77
public class DatasetsBenchmark

Equativ.RoaringBitmaps.Benchmarks/PopcntBenchmark.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
using BenchmarkDotNet.Attributes;
22

3-
namespace Equativ.RoaringBitmaps.Benchmark;
3+
namespace Equativ.RoaringBitmaps.Benchmarks;
44

55
public class PopcntBenchmark
66
{

Equativ.RoaringBitmaps.Benchmarks/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
using BenchmarkDotNet.Running;
22
using Equativ.RoaringBitmaps.Datasets;
33

4-
namespace Equativ.RoaringBitmaps.Benchmark;
4+
namespace Equativ.RoaringBitmaps.Benchmarks;
55

66
internal class Program
77
{

Equativ.RoaringBitmaps.Datasets/ZipRealDataProvider.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,22 @@ public IEnumerator<RoaringBitmap> GetEnumerator()
3535
}
3636
}
3737
}
38+
39+
public IEnumerable<List<int>> EnumerateValues()
40+
{
41+
foreach (var zipArchiveEntry in _mArchive.Entries)
42+
{
43+
using (var stream = zipArchiveEntry.Open())
44+
{
45+
using (var stringReader = new StreamReader(stream))
46+
{
47+
var split = stringReader.ReadLine().Split(',');
48+
var values = split.Select(int.Parse).ToList();
49+
yield return values;
50+
}
51+
}
52+
}
53+
}
3854

3955
IEnumerator IEnumerable.GetEnumerator()
4056
{

Equativ.RoaringBitmaps.Tests/RoaringBitmapTests.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
using Xunit;
2+
using System.Linq;
3+
using System.Reflection;
24

35
namespace Equativ.RoaringBitmaps.Tests;
46

Equativ.RoaringBitmaps/RoaringBitmap.cs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,25 +88,52 @@ public RoaringBitmap Optimize()
8888
/// <returns>RoaringBitmap</returns>
8989
public static RoaringBitmap Create(IEnumerable<int> values)
9090
{
91-
// Todo: Optimize this (avoid Linq)
92-
var groupbyHb = values.Distinct().OrderBy(t => t).GroupBy(Utils.HighBits).OrderBy(t => t.Key).ToList();
91+
var data = values as int[] ?? values.ToArray();
92+
if (data.Length == 0)
93+
{
94+
return new RoaringBitmap(new RoaringArray(0, new List<ushort>(), new List<Container>()));
95+
}
96+
97+
Array.Sort(data);
98+
99+
// In-place deduplication (two pointers technique)
100+
var uniqueCount = 1;
101+
for (var i = 1; i < data.Length; i++)
102+
{
103+
if (data[i] != data[uniqueCount - 1])
104+
{
105+
data[uniqueCount++] = data[i];
106+
}
107+
}
108+
93109
var keys = new List<ushort>();
94110
var containers = new List<Container>();
95-
var size = 0;
96-
foreach (var group in groupbyHb)
111+
var index = 0;
112+
113+
while (index < uniqueCount)
97114
{
98-
keys.Add(group.Key);
99-
if (group.Count() > Container.MaxSize)
115+
var hb = Utils.HighBits(data[index]);
116+
var start = index;
117+
index++;
118+
while (index < uniqueCount && Utils.HighBits(data[index]) == hb)
100119
{
101-
containers.Add(BitmapContainer.Create(group.Select(Utils.LowBits).ToArray()));
120+
index++;
102121
}
103-
else
122+
123+
var count = index - start;
124+
var lows = new ushort[count];
125+
for (var j = 0; j < count; j++)
104126
{
105-
containers.Add(ArrayContainer.Create(group.Select(Utils.LowBits).ToArray()));
127+
lows[j] = Utils.LowBits(data[start + j]);
106128
}
107-
size++;
129+
130+
keys.Add(hb);
131+
containers.Add(count > Container.MaxSize
132+
? BitmapContainer.Create(lows)
133+
: ArrayContainer.Create(lows));
108134
}
109-
return new RoaringBitmap(new RoaringArray(size, keys, containers));
135+
136+
return new RoaringBitmap(new RoaringArray(keys.Count, keys, containers));
110137
}
111138

112139
/// <summary>

0 commit comments

Comments
 (0)