Skip to content

Commit 11eaf7c

Browse files
committed
Implement gz compression
Add test for UncompressedDigest Fix #29
1 parent da83f2f commit 11eaf7c

File tree

4 files changed

+117
-24
lines changed

4 files changed

+117
-24
lines changed

Microsoft.NET.Build.Containers/Descriptor.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ public readonly record struct Descriptor
2929
[JsonPropertyName("digest")]
3030
public string Digest { get; init; }
3131

32+
/// <summary>
33+
/// Digest of the uncompressed content, specifying algorithm and value.
34+
/// </summary>
35+
/// <remarks>
36+
/// <see href="https://github.com/opencontainers/image-spec/blob/7b36cea86235157d78528944cb94c3323ee0905c/descriptor.md#digests"/>
37+
/// </remarks>
38+
[JsonIgnore]
39+
public string UncompressedDigest { get; init; }
40+
3241
/// <summary>
3342
/// Size, in bytes, of the raw content.
3443
/// </summary>

Microsoft.NET.Build.Containers/Image.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public void AddLayer(Layer l)
5656
{
5757
newLayers.Add(l);
5858
manifest["layers"]!.AsArray().Add(l.Descriptor);
59-
config["rootfs"]!["diff_ids"]!.AsArray().Add(l.Descriptor.Digest); // TODO: this should be the descriptor of the UNCOMPRESSED tarball (once we turn on compression)
59+
config["rootfs"]!["diff_ids"]!.AsArray().Add(l.Descriptor.UncompressedDigest);
6060
RecalculateDigest();
6161
}
6262

Microsoft.NET.Build.Containers/Layer.cs

Lines changed: 89 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System.Formats.Tar;
2+
using System.IO.Compression;
23
using System.Security.Cryptography;
34

45
namespace Microsoft.NET.Build.Containers;
@@ -26,37 +27,44 @@ public static Layer FromFiles(IEnumerable<(string path, string containerPath)> f
2627
{
2728
long fileSize;
2829
Span<byte> hash = stackalloc byte[SHA256.HashSizeInBytes];
30+
byte[] uncompressedHash;
2931

3032
string tempTarballPath = ContentStore.GetTempFile();
3133
using (FileStream fs = File.Create(tempTarballPath))
3234
{
33-
// using (GZipStream gz = new(fs, CompressionMode.Compress)) // TODO: https://github.com/dotnet/sdk-container-builds/issues/29
34-
using (TarWriter writer = new(fs, TarEntryFormat.Gnu, leaveOpen: true))
35+
using (HashDigestGZipStream gz = new(fs, leaveOpen: true))
3536
{
36-
foreach (var item in fileList)
37+
using (TarWriter writer = new(gz, TarEntryFormat.Gnu, leaveOpen: true))
3738
{
38-
// Docker treats a COPY instruction that copies to a path like `/app` by
39-
// including `app/` as a directory, with no leading slash. Emulate that here.
40-
string containerPath = item.containerPath.TrimStart(PathSeparators);
39+
foreach (var item in fileList)
40+
{
41+
// Docker treats a COPY instruction that copies to a path like `/app` by
42+
// including `app/` as a directory, with no leading slash. Emulate that here.
43+
string containerPath = item.containerPath.TrimStart(PathSeparators);
44+
45+
writer.WriteEntry(item.path, containerPath);
46+
}
47+
} // Dispose of the TarWriter before getting the hash so the final data get written to the tar stream
4148

42-
writer.WriteEntry(item.path, containerPath);
43-
}
49+
uncompressedHash = gz.GetHash();
4450
}
4551

4652
fileSize = fs.Length;
47-
53+
4854
fs.Position = 0;
4955

5056
SHA256.HashData(fs, hash);
5157
}
5258

5359
string contentHash = Convert.ToHexString(hash).ToLowerInvariant();
60+
string uncompressedContentHash = Convert.ToHexString(uncompressedHash).ToLowerInvariant();
5461

5562
Descriptor descriptor = new()
5663
{
57-
MediaType = "application/vnd.docker.image.rootfs.diff.tar", // TODO: configurable? gzip always?
64+
MediaType = "application/vnd.docker.image.rootfs.diff.tar.gzip", // TODO: configurable? gzip always?
5865
Size = fileSize,
59-
Digest = $"sha256:{contentHash}"
66+
Digest = $"sha256:{contentHash}",
67+
UncompressedDigest = $"sha256:{uncompressedContentHash}",
6068
};
6169

6270
string storedContent = ContentStore.PathForDescriptor(descriptor);
@@ -76,4 +84,74 @@ public static Layer FromFiles(IEnumerable<(string path, string containerPath)> f
7684

7785
private readonly static char[] PathSeparators = new char[] { Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar };
7886

87+
/// <summary>
88+
/// A stream capable of computing the hash digest of raw uncompressed data while also compressing it.
89+
/// </summary>
90+
private sealed class HashDigestGZipStream : Stream
91+
{
92+
private readonly SHA256 hashAlgorithm;
93+
private readonly CryptoStream sha256Stream;
94+
private readonly Stream compressionStream;
95+
96+
public HashDigestGZipStream(Stream writeStream, bool leaveOpen)
97+
{
98+
hashAlgorithm = SHA256.Create();
99+
sha256Stream = new CryptoStream(Stream.Null, hashAlgorithm, CryptoStreamMode.Write);
100+
compressionStream = new GZipStream(writeStream, CompressionMode.Compress, leaveOpen);
101+
}
102+
103+
public override bool CanWrite => true;
104+
105+
public override void Write(byte[] buffer, int offset, int count)
106+
{
107+
sha256Stream.Write(buffer, offset, count);
108+
compressionStream.Write(buffer, offset, count);
109+
}
110+
111+
public override void Write(ReadOnlySpan<byte> buffer)
112+
{
113+
sha256Stream.Write(buffer);
114+
compressionStream.Write(buffer);
115+
}
116+
117+
public override void Flush()
118+
{
119+
sha256Stream.Flush();
120+
compressionStream.Flush();
121+
}
122+
123+
internal byte[] GetHash()
124+
{
125+
sha256Stream.FlushFinalBlock();
126+
return hashAlgorithm.Hash!;
127+
}
128+
129+
protected override void Dispose(bool disposing)
130+
{
131+
try
132+
{
133+
sha256Stream.Dispose();
134+
compressionStream.Dispose();
135+
}
136+
finally
137+
{
138+
base.Dispose(disposing);
139+
}
140+
}
141+
142+
// This class is never used with async writes, but if it ever is, implement these overrides
143+
public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
144+
=> throw new NotImplementedException();
145+
public override ValueTask WriteAsync(ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken)
146+
=> throw new NotImplementedException();
147+
148+
public override bool CanRead => false;
149+
public override bool CanSeek => false;
150+
public override long Length => throw new NotImplementedException();
151+
public override long Position { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
152+
153+
public override int Read(byte[] buffer, int offset, int count) => throw new NotImplementedException();
154+
public override long Seek(long offset, SeekOrigin origin) => throw new NotImplementedException();
155+
public override void SetLength(long value) => throw new NotImplementedException();
156+
}
79157
}

Test.Microsoft.NET.Build.Containers.Filesystem/LayerEndToEnd.cs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using Microsoft.NET.Build.Containers;
2+
using System.IO.Compression;
23
using System.Security.Cryptography;
34

45
namespace Test.Microsoft.NET.Build.Containers.Filesystem;
@@ -21,19 +22,10 @@ public void SingleFileInFolder()
2122
Console.WriteLine(l.Descriptor);
2223

2324
//Assert.AreEqual("application/vnd.oci.image.layer.v1.tar", l.Descriptor.MediaType); // TODO: configurability
24-
Assert.AreEqual(2048, l.Descriptor.Size);
25+
Assert.IsTrue(l.Descriptor.Size is >= 136 and <= 138, $"'l.Descriptor.Size' should be between 136 and 138, but is {l.Descriptor.Size}"); // TODO: determinism!
2526
//Assert.AreEqual("sha256:26140bc75f2fcb3bf5da7d3b531d995c93d192837e37df0eb5ca46e2db953124", l.Descriptor.Digest); // TODO: determinism!
2627

27-
Assert.AreEqual(l.Descriptor.Size, new FileInfo(l.BackingFile).Length);
28-
29-
byte[] hashBytes;
30-
31-
using (FileStream fs = File.OpenRead(l.BackingFile))
32-
{
33-
hashBytes = SHA256.HashData(fs);
34-
}
35-
36-
Assert.AreEqual(Convert.ToHexString(hashBytes), l.Descriptor.Digest.Substring("sha256:".Length), ignoreCase: true);
28+
VerifyDescriptorInfo(l);
3729
}
3830

3931
[TestMethod]
@@ -59,19 +51,33 @@ public void TwoFilesInTwoFolders()
5951
Console.WriteLine(l.Descriptor);
6052

6153
//Assert.AreEqual("application/vnd.oci.image.layer.v1.tar", l.Descriptor.MediaType); // TODO: configurability
62-
Assert.AreEqual(3072, l.Descriptor.Size);
54+
Assert.IsTrue(l.Descriptor.Size is >= 169 and <= 173, $"'l.Descriptor.Size' should be between 169 and 173, but is {l.Descriptor.Size}"); // TODO: determinism!
6355
//Assert.AreEqual("sha256:26140bc75f2fcb3bf5da7d3b531d995c93d192837e37df0eb5ca46e2db953124", l.Descriptor.Digest); // TODO: determinism!
6456

57+
VerifyDescriptorInfo(l);
58+
}
59+
60+
private static void VerifyDescriptorInfo(Layer l)
61+
{
6562
Assert.AreEqual(l.Descriptor.Size, new FileInfo(l.BackingFile).Length);
6663

6764
byte[] hashBytes;
65+
byte[] uncompressedHashBytes;
6866

6967
using (FileStream fs = File.OpenRead(l.BackingFile))
7068
{
7169
hashBytes = SHA256.HashData(fs);
70+
71+
fs.Position = 0;
72+
73+
using (GZipStream decompressionStream = new GZipStream(fs, CompressionMode.Decompress))
74+
{
75+
uncompressedHashBytes = SHA256.HashData(decompressionStream);
76+
}
7277
}
7378

7479
Assert.AreEqual(Convert.ToHexString(hashBytes), l.Descriptor.Digest.Substring("sha256:".Length), ignoreCase: true);
80+
Assert.AreEqual(Convert.ToHexString(uncompressedHashBytes), l.Descriptor.UncompressedDigest.Substring("sha256:".Length), ignoreCase: true);
7581
}
7682

7783
TransientTestFolder testSpecificArtifactRoot;

0 commit comments

Comments
 (0)