Skip to content

Commit 1eecd32

Browse files
committed
sample changed to pattern (it seems, it better naming)
added size testing code for patterned compression added documentation
1 parent 5f23abe commit 1eecd32

19 files changed

+502
-302
lines changed

Blazer.Benchmark/Blazer.Benchmark.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
<Reference Include="System.Xml" />
5959
</ItemGroup>
6060
<ItemGroup>
61+
<Compile Include="MessagesDto\LogMessage.cs" />
6162
<Compile Include="Program.cs" />
6263
<Compile Include="Properties\AssemblyInfo.cs" />
6364
<Compile Include="QuickLZ\QuickLZ.cs" />
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Xml.Serialization;
6+
7+
namespace Force.Blazer.Benchmark.MessagesDto
8+
{
9+
public class LogMessage
10+
{
11+
public DateTime EventDate { get; set; }
12+
13+
public string Level { get; set; }
14+
15+
public string UserName { get; set; }
16+
17+
public int ProcessingTime { get; set; }
18+
19+
public string Message { get; set; }
20+
21+
public LogMessage()
22+
{
23+
}
24+
25+
public LogMessage(Random r)
26+
{
27+
EventDate = new DateTime(2016, 1, 1).AddSeconds(r.Next(60 * 60 * 24 * 365));
28+
Level = new[] { "DEBUG", "INFO", "WARN", "ERROR", "FATAL" }[r.Next(5)];
29+
if (r.Next(2) == 0) UserName = "System";
30+
else UserName = _words[r.Next(_words.Length)];
31+
ProcessingTime = r.Next(1000);
32+
Message = string.Join(" ", Enumerable.Range(0, r.Next(10) + 3).Select(x => _words[r.Next(_words.Length)]));
33+
}
34+
35+
private static readonly string[] _words;
36+
37+
static LogMessage()
38+
{
39+
var r = new Random(124);
40+
_words =
41+
Enumerable.Range(0, 1000)
42+
.Select(_ => new string(Enumerable.Range(0, r.Next(6) + 1).Select(x => (char)(r.Next(26) + 'a')).ToArray())).ToArray();
43+
}
44+
45+
public static byte[][] Generate(int count)
46+
{
47+
// fixed seed
48+
var r = new Random(124);
49+
var l = new List<LogMessage>();
50+
for (var i = 0; i < count; i++)
51+
l.Add(new LogMessage(r));
52+
53+
var l2 = new List<byte[]>();
54+
var s = new XmlSerializer(typeof(LogMessage));
55+
for (var i = 0; i < count; i++)
56+
{
57+
var ms = new MemoryStream();
58+
s.Serialize(ms, l[i]);
59+
l2.Add(ms.ToArray());
60+
}
61+
62+
return l2.ToArray();
63+
}
64+
65+
public static byte[] GenerateBestPattern()
66+
{
67+
var m = new LogMessage
68+
{
69+
EventDate = new DateTime(2016, 1, 1),
70+
Level = "DEBUGINFOWARNERRORFATAL",
71+
UserName = "System",
72+
Message = string.Join(string.Empty, _words)
73+
};
74+
75+
var s = new XmlSerializer(typeof(LogMessage));
76+
var ms = new MemoryStream();
77+
s.Serialize(ms, m);
78+
return ms.ToArray();
79+
}
80+
}
81+
}

Blazer.Benchmark/Program.cs

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
using System.IO;
44
using System.IO.Compression;
55
using System.Linq;
6+
using System.Text;
67

8+
using Force.Blazer.Algorithms;
79
using Force.Blazer.Algorithms.Crc32C;
10+
using Force.Blazer.Benchmark.MessagesDto;
811
using Force.Blazer.Native;
912

1013
using ICSharpCode.SharpZipLib.BZip2;
1114
using ICSharpCode.SharpZipLib.GZip;
15+
using ICSharpCode.SharpZipLib.Zip.Compression;
1216

1317
using LZ4;
1418

@@ -20,9 +24,10 @@ public class Program
2024
{
2125
public static void Main()
2226
{
23-
BenchCrc32C();
27+
BenchPatternedCompression();
28+
// BenchCrc32C();
2429
//BenchNoCompression();
25-
BenchFile("Selesia Total", @"..\..\..\TestFiles\Silesia\ztotal.tar");
30+
// BenchFile("Selesia Total", @"..\..\..\TestFiles\Silesia\ztotal.tar");
2631
// BenchFile("Log", @"..\..\..\TestFiles\Service.2016-05-01.log");
2732
// BenchFile("AdventureWorks (high compressible db)", @"..\..\..\TestFiles\AdventureWorks2012_Data.mdf");
2833
// BenchFile("enwiki8 (big text document)", @"..\..\..\TestFiles\enwik8");
@@ -108,9 +113,9 @@ private static void BenchData(string title, byte[] array)
108113
DoBench("Snappy ", array, x => new SnappyStream(x, CompressionMode.Compress), x => new SnappyStream(x, CompressionMode.Decompress));
109114
DoBench("StdGZip ", array, x => new GZipStream(x, CompressionMode.Compress), x => new GZipStream(x, CompressionMode.Decompress));
110115
// very slow for usual running
111-
DoBench("BZip2 ", array, x => new BZip2OutputStream(x), x => new BZip2InputStream(x));
112-
DoBenchQuickLZ("QuickLZ/1", 1, array);
113-
DoBenchQuickLZ("QuickLZ/3", 3, array);
116+
// DoBench("BZip2 ", array, x => new BZip2OutputStream(x), x => new BZip2InputStream(x));
117+
// DoBenchQuickLZ("QuickLZ/1", 1, array);
118+
// DoBenchQuickLZ("QuickLZ/3", 3, array);
114119
}
115120

116121
private static void DoBench(string title, byte[] data, Func<Stream, Stream> createCompressionStream, Func<Stream, Stream> createDecompressionStream)
@@ -222,5 +227,52 @@ private static void DoBenchBlock(
222227
100.0 * comprArray.Length / data.Length,
223228
1.0 * data.Length / comprArray.Length);
224229
}
230+
231+
private static void BenchPatternedCompression()
232+
{
233+
var data = LogMessage.Generate(10000);
234+
var totalSize = data.Sum(x => x.Length);
235+
236+
var gzipSize = data.Sum(x =>
237+
{
238+
var deflater = new Deflater();
239+
deflater.SetInput(x);
240+
deflater.Finish();
241+
var cnt = 0;
242+
while (!deflater.IsNeedingInput)
243+
cnt += deflater.Deflate(new byte[x.Length]);
244+
return cnt;
245+
});
246+
247+
var quickLzSize = data.Sum(x => QuickLZ.QuickLZ.compress(x, 1).Length);
248+
249+
var blStreamIndependent = data.Sum(x => StreamEncoder.CompressData(x).Length);
250+
251+
var ps = BlazerPatternedHelper.CreateStream();
252+
ps.PreparePattern(data[0]);
253+
254+
var psbest = BlazerPatternedHelper.CreateStream();
255+
psbest.PreparePattern(LogMessage.GenerateBestPattern());
256+
// var psh = BlazerPatternedHelper.CreateStreamHigh();
257+
// psh.PreparePattern(data[0]);
258+
// var pb = BlazerPatternedHelper.CreateBlock();
259+
// pb.PreparePattern(data[0]);
260+
261+
var blSPatterned = data.Sum(x => ps.EncodeWithPattern(x).Length);
262+
var blSBestPatterned = data.Sum(x => psbest.EncodeWithPattern(x).Length);
263+
// var blSHPatterned = data.Sum(x => psh.EncodeWithPattern(x).Length);
264+
// var blBPatterned = data.Sum(x => pb.EncodeWithPattern(x).Length);
265+
266+
Console.WriteLine(Encoding.UTF8.GetString(data[0]));
267+
Console.WriteLine();
268+
Console.WriteLine("Total: {0}", totalSize);
269+
Console.WriteLine("GZip (Deflate): {0}\t{1:0.000}", gzipSize, 100.0 * gzipSize / totalSize);
270+
Console.WriteLine("QuickLZ: {0}\t{1:0.000}", quickLzSize, 100.0 * quickLzSize / totalSize);
271+
Console.WriteLine("Blazer Independent: {0}\t{1:0.000}", blStreamIndependent, 100.0 * blStreamIndependent / totalSize);
272+
Console.WriteLine("Blazer Pattern: {0}\t{1:0.000}", blSPatterned, 100.0 * blSPatterned / totalSize);
273+
Console.WriteLine("Blazer Pattern Best: {0}\t{1:0.000}", blSBestPatterned, 100.0 * blSBestPatterned / totalSize);
274+
// Console.WriteLine("Blazer Pattern SH: {0}\t{1:0.000}", blSHPatterned, 100.0 * blSHPatterned / totalSize);
275+
// Console.WriteLine("Blazer Pattern B: {0}\t{1:0.000}", blBPatterned, 100.0 * blBPatterned / totalSize);
276+
}
225277
}
226278
}

Blazer.Native/build.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ C:\Windows\Microsoft.NET\Framework64\v4.0.30319\MSBuild.exe /t:Rebuild /p:Config
33
xcopy /y Release\Blazer.Native.x86.dll ..\Blazer.Native.Build\
44
xcopy /y Release\Blazer.Native.x64.dll ..\Blazer.Native.Build\
55

6-
del /s /q %temp%\Blazer.Net.0.8.1.7
6+
del /s /q %temp%\Blazer.Net.0.8.2.8

Blazer.Net.Tests/Blazer.Net.Tests.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
<Compile Include="IntegrityTests.cs" />
5555
<Compile Include="OptionsTests.cs" />
5656
<Compile Include="Properties\AssemblyInfo.cs" />
57-
<Compile Include="SampledCompressionTests.cs" />
57+
<Compile Include="PatternedCompressionTests.cs" />
5858
<Compile Include="StreamEncoderTests.cs" />
5959
</ItemGroup>
6060
<ItemGroup>
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
using System;
2+
using System.Linq;
3+
4+
using Force.Blazer.Algorithms;
5+
using Force.Blazer.Algorithms.Patterned;
6+
7+
using NUnit.Framework;
8+
9+
namespace Blazer.Net.Tests
10+
{
11+
[TestFixture(1)]
12+
[TestFixture(2)]
13+
[TestFixture(3)]
14+
public class PatternedCompressionTests
15+
{
16+
private readonly Type _compressorType;
17+
18+
private BasePatternedCompressor GetCompressor()
19+
{
20+
return (BasePatternedCompressor)Activator.CreateInstance(_compressorType);
21+
}
22+
23+
public PatternedCompressionTests(int type)
24+
{
25+
if (type == 1) _compressorType = typeof(StreamPatternedCompressor);
26+
else if (type == 2) _compressorType = typeof(StreamHighPatternedCompressor);
27+
else if (type == 3) _compressorType = typeof(BlockPatternedCompressor);
28+
else throw new NotImplementedException();
29+
}
30+
31+
[Test]
32+
public void DataWithPattern_Should_Be_Correctly_Encoded_Decoded()
33+
{
34+
var ssed = GetCompressor();
35+
var pattern = new byte[100];
36+
pattern[1] = 42;
37+
ssed.PreparePattern(pattern, 0, pattern.Length);
38+
39+
var data1 = new byte[10];
40+
data1[0] = 42;
41+
42+
var data2 = new byte[10];
43+
data2[0] = 12;
44+
45+
var tmpOut = new byte[ssed.CalculateMaxCompressedBufferLength(10)];
46+
var tmpOut2 = new byte[ssed.CalculateMaxCompressedBufferLength(10)];
47+
48+
var cntPatterned = ssed.EncodeWithPattern(data1, 0, data1.Length, tmpOut, 0);
49+
var cntUnpatterned = StreamEncoder.CompressData(data1);
50+
// TODO: uncomment
51+
Assert.That(cntPatterned, Is.LessThan(cntUnpatterned.Length));
52+
var cntPatternUnpacked = ssed.DecodeWithPattern(tmpOut, 0, cntPatterned, tmpOut2, 0);
53+
Assert.That(cntPatternUnpacked, Is.EqualTo(10));
54+
CollectionAssert.AreEqual(data1, tmpOut2.Take(cntPatternUnpacked));
55+
56+
// checking that we can repeat without failure
57+
cntPatterned = ssed.EncodeWithPattern(data2, 0, data2.Length, tmpOut, 0);
58+
cntPatternUnpacked = ssed.DecodeWithPattern(tmpOut, 0, cntPatterned, tmpOut2, 0);
59+
Assert.That(cntPatternUnpacked, Is.EqualTo(10));
60+
CollectionAssert.AreEqual(data2, tmpOut2.Take(cntPatternUnpacked));
61+
}
62+
63+
[Test]
64+
public void DataWithPattern_Should_Be_Correctly_Encoded_Decoded_With_Offsets()
65+
{
66+
var ssed = GetCompressor();
67+
var pattern = new byte[100];
68+
pattern[1] = 42;
69+
ssed.PreparePattern(pattern, 1, pattern.Length - 1);
70+
71+
var data1 = new byte[10];
72+
data1[1] = 42;
73+
74+
var data2 = new byte[9];
75+
data1[0] = 42;
76+
77+
var tmpOut = new byte[ssed.CalculateMaxCompressedBufferLength(10)];
78+
var tmpOut2 = new byte[ssed.CalculateMaxCompressedBufferLength(10)];
79+
80+
var cntPatterned1 = ssed.EncodeWithPattern(data1, 1, data1.Length - 1, tmpOut, 1);
81+
var cntPatterned2 = ssed.EncodeWithPattern(data2, 0, data2.Length, tmpOut2, 0);
82+
Assert.That(cntPatterned1, Is.EqualTo(cntPatterned2));
83+
var cntPatternedUnpacked = ssed.DecodeWithPattern(tmpOut, 1, cntPatterned1, tmpOut2, 1);
84+
Assert.That(cntPatternedUnpacked, Is.EqualTo(9));
85+
CollectionAssert.AreEqual(data1.Skip(1), tmpOut2.Skip(1).Take(cntPatternedUnpacked));
86+
}
87+
88+
[Test]
89+
public void DataWithPattern_Should_Be_Correctly_Encoded_Decoded_With_Simple_Interface()
90+
{
91+
var ssed = GetCompressor();
92+
var pattern = new byte[100];
93+
pattern[1] = 42;
94+
ssed.PreparePattern(pattern, 1, pattern.Length - 1);
95+
96+
var data1 = new byte[10];
97+
data1[1] = 42;
98+
99+
var patterned = ssed.EncodeWithPattern(data1);
100+
var patternedUnpacked = ssed.DecodeWithPattern(patterned);
101+
102+
CollectionAssert.AreEqual(data1, patternedUnpacked);
103+
}
104+
105+
[Test]
106+
public void Zero_Length_Should_not_Cause_Error()
107+
{
108+
var ssed = GetCompressor();
109+
var pattern = new byte[100];
110+
pattern[1] = 42;
111+
ssed.PreparePattern(pattern, 1, pattern.Length - 1);
112+
113+
var data1 = new byte[0];
114+
115+
var patterned = ssed.EncodeWithPattern(data1);
116+
Assert.That(patterned.Length, Is.EqualTo(1));
117+
var patternUnpacked = ssed.DecodeWithPattern(patterned);
118+
119+
CollectionAssert.AreEqual(data1, patternUnpacked);
120+
}
121+
}
122+
}

0 commit comments

Comments
 (0)