Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Equativ.RoaringBitmaps.Benchmarks/CRoaringBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
using Equativ.RoaringBitmaps.Datasets;
using Roaring.Net.CRoaring;

namespace Equativ.RoaringBitmaps.Benchmark;
namespace Equativ.RoaringBitmaps.Benchmarks;

// Ran on Macbook pro M1
// ⚠️Unmanaged allocation happening in CRoaring are not tracked by the memory diagnoser.
Expand Down
43 changes: 43 additions & 0 deletions Equativ.RoaringBitmaps.Benchmarks/CreateBenchmark.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
using Equativ.RoaringBitmaps.Datasets;

namespace Equativ.RoaringBitmaps.Benchmarks;

[MemoryDiagnoser(false)]
public class CreateBenchmark
{
private List<int>[] _values;
private readonly Consumer _consumer = new();

[Params(
Paths.Census1881,
Paths.Census1881Srt,
Paths.CensusIncome,
Paths.Census1881Srt,
Paths.Dimension003,
Paths.Dimension008,
Paths.Dimension033,
Paths.UsCensus2000,
Paths.WeatherSept85,
Paths.WeatherSept85Srt,
Paths.WikileaksNoQuotes,
Paths.WikileaksNoQuotesSrt)]
public string FileName { get; set; }

[GlobalSetup]
public void Setup()
{
using var provider = new ZipRealDataProvider(FileName);
_values = provider.EnumerateValues().ToArray();
}

[Benchmark]
public void Create()
{
for (var k = 0; k < _values.Length - 1; k++)
{
_consumer.Consume(RoaringBitmap.Create(_values[k]));
}
}
}
2 changes: 1 addition & 1 deletion Equativ.RoaringBitmaps.Benchmarks/DatasetsBenchmark.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using BenchmarkDotNet.Attributes;
using Equativ.RoaringBitmaps.Datasets;

namespace Equativ.RoaringBitmaps.Benchmark;
namespace Equativ.RoaringBitmaps.Benchmarks;

[MemoryDiagnoser(false)]
public class DatasetsBenchmark
Expand Down
2 changes: 1 addition & 1 deletion Equativ.RoaringBitmaps.Benchmarks/PopcntBenchmark.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using BenchmarkDotNet.Attributes;

namespace Equativ.RoaringBitmaps.Benchmark;
namespace Equativ.RoaringBitmaps.Benchmarks;

public class PopcntBenchmark
{
Expand Down
2 changes: 1 addition & 1 deletion Equativ.RoaringBitmaps.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using BenchmarkDotNet.Running;
using Equativ.RoaringBitmaps.Datasets;

namespace Equativ.RoaringBitmaps.Benchmark;
namespace Equativ.RoaringBitmaps.Benchmarks;

internal class Program
{
Expand Down
16 changes: 16 additions & 0 deletions Equativ.RoaringBitmaps.Datasets/ZipRealDataProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
{
using (var stringReader = new StreamReader(stream))
{
var split = stringReader.ReadLine().Split(',');

Check warning on line 30 in Equativ.RoaringBitmaps.Datasets/ZipRealDataProvider.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.
var values = split.Select(int.Parse).ToList();
var bitmap = RoaringBitmap.Create(values);
yield return bitmap.Optimize();
Expand All @@ -35,6 +35,22 @@
}
}
}

public IEnumerable<List<int>> EnumerateValues()
{
foreach (var zipArchiveEntry in _mArchive.Entries)
{
using (var stream = zipArchiveEntry.Open())
{
using (var stringReader = new StreamReader(stream))
{
var split = stringReader.ReadLine().Split(',');

Check warning on line 47 in Equativ.RoaringBitmaps.Datasets/ZipRealDataProvider.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.
var values = split.Select(int.Parse).ToList();
yield return values;
}
}
}
}

IEnumerator IEnumerable.GetEnumerator()
{
Expand Down
2 changes: 2 additions & 0 deletions Equativ.RoaringBitmaps.Tests/RoaringBitmapTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using Xunit;
using System.Linq;
using System.Reflection;

namespace Equativ.RoaringBitmaps.Tests;

Expand Down
49 changes: 38 additions & 11 deletions Equativ.RoaringBitmaps/RoaringBitmap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,52 @@ public RoaringBitmap Optimize()
/// <returns>RoaringBitmap</returns>
public static RoaringBitmap Create(IEnumerable<int> values)
{
// Todo: Optimize this (avoid Linq)
var groupbyHb = values.Distinct().OrderBy(t => t).GroupBy(Utils.HighBits).OrderBy(t => t.Key).ToList();
var data = values as int[] ?? values.ToArray();
if (data.Length == 0)
{
return new RoaringBitmap(new RoaringArray(0, new List<ushort>(), new List<Container>()));
}

Array.Sort(data);

// In-place deduplication (two pointers technique)
var uniqueCount = 1;
for (var i = 1; i < data.Length; i++)
{
if (data[i] != data[uniqueCount - 1])
{
data[uniqueCount++] = data[i];
}
}

var keys = new List<ushort>();
var containers = new List<Container>();
var size = 0;
foreach (var group in groupbyHb)
var index = 0;

while (index < uniqueCount)
{
keys.Add(group.Key);
if (group.Count() > Container.MaxSize)
var hb = Utils.HighBits(data[index]);
var start = index;
index++;
while (index < uniqueCount && Utils.HighBits(data[index]) == hb)
{
containers.Add(BitmapContainer.Create(group.Select(Utils.LowBits).ToArray()));
index++;
}
else

var count = index - start;
var lows = new ushort[count];
for (var j = 0; j < count; j++)
{
containers.Add(ArrayContainer.Create(group.Select(Utils.LowBits).ToArray()));
lows[j] = Utils.LowBits(data[start + j]);
}
size++;

keys.Add(hb);
containers.Add(count > Container.MaxSize
? BitmapContainer.Create(lows)
: ArrayContainer.Create(lows));
}
return new RoaringBitmap(new RoaringArray(size, keys, containers));

return new RoaringBitmap(new RoaringArray(keys.Count, keys, containers));
}

/// <summary>
Expand Down
Loading