Skip to content

Commit 9bbd350

Browse files
hfishback01dylan-smithArin Ghazarian
authored
Downloading large files (>2GB) (#854)
Closes #746 Implements new logic to stream large files to disc. This allows the CLI to handle large files (>2GB) without crashing. **NOTE:** Tested using 3.1GB file and found that this can take between 20-35 minutes to run the following functions: [CopyToAsync](https://github.com/github/gh-gei/pull/854/files#diff-d48bd1e08d046c8f7840ed1200352f9ecb31fd3b2507ca0f30aa72c51a65a033R69) and [UploadAsync](https://github.com/github/gh-gei/pull/854/files#diff-8e186dae5def84fc516adc23e757853fe4e5855f26ee7e384a6c0a157a60585bR76) (total of 40min to 1hr 10min). This could be specific to my local environment as, when developing the logic on @dylan-smith's computer, the CopyToAsync func did not take this long. - [x] Did you write/update appropriate tests - [x] Release notes updated (if appropriate) - [x] Appropriate logging output - [x] Issue linked - [x] Docs updated (or issue created) - [x] New package licenses are added to `ThirdPartyNotices.txt` (if applicable) --------- Co-authored-by: Dylan Smith <dylan-smith@github.com> Co-authored-by: Arin Ghazarian <aringhazarian@github.com>
1 parent 8464c78 commit 9bbd350

File tree

13 files changed

+154
-92
lines changed

13 files changed

+154
-92
lines changed

RELEASENOTES.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
- Fix `gh bbs2gh grant-migrator-role` so it doesn't throw `System.InvalidOperationException`
22
- Rename `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` environment variables to `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` respectively to align with the environment variables that the AWS CLI already uses. Old environment variables are still supported but they will be removed in future.
3-
- Send a `User-Agent` header with the current CLI version when downloading migration archives from GitHub Enterprise Server
3+
- Send a `User-Agent` header with the current CLI version when downloading migration archives from GitHub Enterprise Server
4+
- Add support for migration archives larger than 2GB when using the blob storage flow

src/Octoshift/AwsApi.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,9 @@ public virtual async Task<string> UploadToBucket(string bucketName, string fileN
4545
return GetPreSignedUrlForFile(bucketName, keyName);
4646
}
4747

48-
public virtual async Task<string> UploadToBucket(string bucketName, byte[] bytes, string keyName)
48+
public virtual async Task<string> UploadToBucket(string bucketName, Stream content, string keyName)
4949
{
50-
using var byteStream = new MemoryStream(bytes);
51-
await _transferUtility.UploadAsync(byteStream, bucketName, keyName);
50+
await _transferUtility.UploadAsync(content, bucketName, keyName);
5251
return GetPreSignedUrlForFile(bucketName, keyName);
5352
}
5453

src/Octoshift/AzureApi.cs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.IO;
23
using System.Net.Http;
34
using System.Threading.Tasks;
45
using Azure.Storage.Blobs;
@@ -15,6 +16,7 @@ public class AzureApi
1516
private readonly OctoLogger _log;
1617
private const string CONTAINER_PREFIX = "migration-archives";
1718
private const int AUTHORIZATION_TIMEOUT_IN_HOURS = 48;
19+
private const int DEFAULT_BLOCK_SIZE = 4 * 1024 * 1024;
1820

1921
public AzureApi(HttpClient client, BlobServiceClient blobServiceClient, OctoLogger log)
2022
{
@@ -39,6 +41,12 @@ public virtual async Task<byte[]> DownloadArchive(string fromUrl)
3941
}
4042

4143
public virtual async Task<Uri> UploadToBlob(string fileName, byte[] content)
44+
{
45+
using var memoryStream = new MemoryStream(content);
46+
return await UploadToBlob(fileName, memoryStream);
47+
}
48+
49+
public virtual async Task<Uri> UploadToBlob(string fileName, Stream content)
4250
{
4351
var containerClient = await CreateBlobContainerAsync();
4452
var blobClient = containerClient.GetBlobClient(fileName);
@@ -47,13 +55,12 @@ public virtual async Task<Uri> UploadToBlob(string fileName, byte[] content)
4755
{
4856
TransferOptions = new Azure.Storage.StorageTransferOptions()
4957
{
50-
InitialTransferSize = 4 * 1024 * 1024,
51-
MaximumTransferSize = 4 * 1024 * 1024
58+
InitialTransferSize = DEFAULT_BLOCK_SIZE,
59+
MaximumTransferSize = DEFAULT_BLOCK_SIZE
5260
},
5361
};
5462

55-
var binaryDataContent = new BinaryData(content);
56-
await blobClient.UploadAsync(binaryDataContent, options);
63+
await blobClient.UploadAsync(content, options);
5764
return GetServiceSasUriForBlob(blobClient);
5865
}
5966

src/Octoshift/FileSystemProvider.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ public class FileSystemProvider
1515

1616
public virtual FileStream Open(string path, FileMode mode) => File.Open(path, mode);
1717

18+
public virtual FileStream OpenRead(string path) => File.OpenRead(path);
19+
1820
public virtual async Task WriteAllTextAsync(string path, string contents) => await File.WriteAllTextAsync(path, contents);
1921

2022
public virtual async ValueTask WriteAsync(FileStream fileStream, ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default)
@@ -34,4 +36,14 @@ public virtual void DeleteIfExists(string path)
3436
File.Delete(path);
3537
}
3638
}
39+
40+
public virtual string GetTempFileName() => Path.GetTempFileName();
41+
42+
public virtual async Task CopySourceToTargetStreamAsync(Stream source, Stream target)
43+
{
44+
if (source != null)
45+
{
46+
await source.CopyToAsync(target);
47+
}
48+
}
3749
}

src/Octoshift/HttpDownloadService.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ namespace OctoshiftCLI
1111
{
1212
public class HttpDownloadService
1313
{
14-
internal Func<string, string, Task> WriteToFile = async (path, contents) => await File.WriteAllTextAsync(path, contents);
15-
1614
private readonly OctoLogger _log;
1715
private readonly HttpClient _httpClient;
16+
private readonly FileSystemProvider _fileSystemProvider;
1817

19-
public HttpDownloadService(OctoLogger log, HttpClient httpClient, IVersionProvider versionProvider)
18+
public HttpDownloadService(OctoLogger log, HttpClient httpClient, FileSystemProvider fileSystemProvider, IVersionProvider versionProvider)
2019
{
2120
_log = log;
2221
_httpClient = httpClient;
22+
_fileSystemProvider = fileSystemProvider;
2323

2424
if (_httpClient is not null)
2525
{
@@ -36,13 +36,14 @@ public virtual async Task DownloadToFile(string url, string file)
3636
{
3737
_log.LogVerbose($"HTTP GET: {url}");
3838

39-
using var response = await _httpClient.GetAsync(url);
39+
using var response = await _httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead);
4040
_log.LogVerbose($"RESPONSE ({response.StatusCode}): <truncated>");
4141

4242
response.EnsureSuccessStatusCode();
4343

44-
var contents = await response.Content.ReadAsStringAsync();
45-
await WriteToFile(file, contents);
44+
await using var streamToReadFrom = await response.Content.ReadAsStreamAsync();
45+
await using var streamToWriteTo = _fileSystemProvider.Open(file, FileMode.Create);
46+
await _fileSystemProvider.CopySourceToTargetStreamAsync(streamToReadFrom, streamToWriteTo);
4647
}
4748

4849
public virtual async Task<byte[]> DownloadToBytes(string url)

src/OctoshiftCLI.Tests/AwsApiTests.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,19 @@ public async Task UploadFileToBucket_Should_Succeed()
3232

3333
var result = await awsApi.UploadToBucket(bucketName, fileName, keyName);
3434

35-
// Assert
35+
// Assert
3636
result.Should().Be(url);
3737
transferUtility.Verify(m => m.UploadAsync(fileName, bucketName, keyName, It.IsAny<CancellationToken>()));
3838
}
3939

4040
[Fact]
41-
public async Task UploadToBucket_Uploads_Byte_Array()
41+
public async Task UploadToBucket_Uploads_FileStream()
4242
{
4343
// Arrange
4444
var bucketName = "bucket";
4545
var bytes = Encoding.ASCII.GetBytes("here are some bytes");
46+
using var stream = new MemoryStream();
47+
stream.Write(bytes, 0, bytes.Length);
4648
var keyName = "key";
4749
var url = "http://example.com/file.zip";
4850

@@ -53,7 +55,7 @@ public async Task UploadToBucket_Uploads_Byte_Array()
5355
transferUtility.Setup(m => m.S3Client).Returns(s3Client.Object);
5456
using var awsApi = new AwsApi(transferUtility.Object);
5557

56-
var result = await awsApi.UploadToBucket(bucketName, bytes, keyName);
58+
var result = await awsApi.UploadToBucket(bucketName, stream, keyName);
5759

5860
// Assert
5961
result.Should().Be(url);

src/OctoshiftCLI.Tests/AzureApiTests.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.IO;
34
using System.Linq;
45
using System.Net.Http;
56
using System.Text;
@@ -23,7 +24,9 @@ public async Task UploadToBlob_Should_Fail_On_Invalid_Credentials()
2324
var blobContainerClient = new Mock<BlobContainerClient>();
2425
var blobClient = new Mock<BlobClient>();
2526
var fileName = "file.zip";
26-
var content = Encoding.UTF8.GetBytes("Upload content").ToArray();
27+
var bytes = Encoding.UTF8.GetBytes("Upload content").ToArray();
28+
using var content = new MemoryStream();
29+
content.Write(bytes, 0, bytes.Length);
2730

2831
var azureApi = new AzureApi(client.Object, blobServiceClient.Object, TestHelpers.CreateMock<OctoLogger>().Object);
2932

src/OctoshiftCLI.Tests/HttpDownloadServiceTests.cs

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using System.IO;
12
using System.Net;
23
using System.Net.Http;
34
using System.Text;
@@ -15,14 +16,14 @@ public class HttpDownloadServiceTests
1516
{
1617
private const string EXPECTED_RESPONSE_CONTENT = "RESPONSE_CONTENT";
1718
private readonly Mock<OctoLogger> _mockOctoLogger = TestHelpers.CreateMock<OctoLogger>();
19+
private readonly Mock<FileSystemProvider> _mockFileSystemProvider = TestHelpers.CreateMock<FileSystemProvider>();
1820

1921
[Fact]
2022
public async Task Downloads_File()
2123
{
2224
// Arrange
2325
var url = "https://objects-staging-origin.githubusercontent.com/octoshiftmigrationlogs/github/example-repo.txt";
2426
var filePath = "example-file";
25-
var fileContents = (string)null;
2627
var expectedFileContents = "expected-file-contents";
2728

2829
using var httpResponse = new HttpResponseMessage(HttpStatusCode.OK)
@@ -41,21 +42,25 @@ public async Task Downloads_File()
4142
.ReturnsAsync(httpResponse);
4243

4344
using var httpClient = new HttpClient(mockHttpHandler.Object);
45+
_mockFileSystemProvider.Setup(x => x.Open(filePath, It.IsAny<FileMode>())).Returns(It.IsAny<FileStream>());
4446

45-
// Act
46-
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, null)
47+
string actualFileContents = null;
48+
_mockFileSystemProvider.Setup(x => x.CopySourceToTargetStreamAsync(It.IsAny<Stream>(), It.IsAny<Stream>())).Callback<Stream, Stream>((s, _) =>
4749
{
48-
WriteToFile = (_, contents) =>
49-
{
50-
fileContents = contents;
51-
return Task.CompletedTask;
52-
}
53-
};
50+
using var ms = new MemoryStream();
51+
s.CopyTo(ms);
52+
actualFileContents = Encoding.UTF8.GetString(ms.ToArray());
53+
});
5454

55+
56+
// Act
57+
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, _mockFileSystemProvider.Object, null);
5558
await httpDownloadService.DownloadToFile(url, filePath);
5659

5760
// Assert
58-
fileContents.Should().Be(expectedFileContents);
61+
_mockFileSystemProvider.Verify(m => m.Open(filePath, FileMode.Create), Times.Once);
62+
_mockFileSystemProvider.Verify(m => m.CopySourceToTargetStreamAsync(It.IsAny<Stream>(), It.IsAny<Stream>()), Times.Once);
63+
actualFileContents.Should().Be(expectedFileContents);
5964
}
6065

6166
[Fact]
@@ -72,7 +77,7 @@ public void It_Sets_User_Agent_Header_With_Comments()
7277
mockVersionProvider.Setup(m => m.GetVersionComments()).Returns(versionComments);
7378

7479
// Act
75-
_ = new HttpDownloadService(_mockOctoLogger.Object, httpClient, mockVersionProvider.Object);
80+
_ = new HttpDownloadService(_mockOctoLogger.Object, httpClient, _mockFileSystemProvider.Object, mockVersionProvider.Object);
7681

7782
// Assert
7883
httpClient.DefaultRequestHeaders.UserAgent.Should().HaveCount(2);
@@ -85,7 +90,6 @@ public async Task Raises_Exception_When_File_Cannot_Be_Downloaded()
8590
// Arrange
8691
var url = "https://objects-staging-origin.githubusercontent.com/octoshiftmigrationlogs/github/example-repo.txt";
8792
var filePath = "example-file";
88-
var fileContents = (string)null;
8993

9094
using var httpResponse = new HttpResponseMessage(HttpStatusCode.NotFound);
9195

@@ -101,15 +105,10 @@ public async Task Raises_Exception_When_File_Cannot_Be_Downloaded()
101105

102106
using var httpClient = new HttpClient(mockHttpHandler.Object);
103107

108+
_mockFileSystemProvider.Setup(x => x.Open(filePath, System.IO.FileMode.Open)).Returns(It.IsAny<FileStream>());
109+
104110
// Act
105-
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, null)
106-
{
107-
WriteToFile = (_, contents) =>
108-
{
109-
fileContents = contents;
110-
return Task.CompletedTask;
111-
}
112-
};
111+
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, _mockFileSystemProvider.Object, null);
113112

114113
// Assert
115114
await FluentActions
@@ -137,7 +136,7 @@ public async Task DownloadArchive_Should_Succeed()
137136

138137
using var httpClient = new HttpClient(handlerMock.Object);
139138

140-
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, null);
139+
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, _mockFileSystemProvider.Object, null);
141140

142141
// Act
143142
var archiveContent = await httpDownloadService.DownloadToBytes(url);
@@ -163,7 +162,7 @@ public async Task DownloadArchive_Should_Throw_HttpRequestException_On_Non_Succe
163162
.ReturnsAsync(httpResponse);
164163

165164
using var httpClient = new HttpClient(handlerMock.Object);
166-
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, null);
165+
var httpDownloadService = new HttpDownloadService(_mockOctoLogger.Object, httpClient, _mockFileSystemProvider.Object, null);
167166

168167
// Act, Assert
169168
await httpDownloadService

0 commit comments

Comments
 (0)