Skip to content

Commit 103db85

Browse files
Stream archives to Azure Blob Storage in bbs2gh migrate-repo, rather than loading the whole archive into memory (#978)
When running a `bbs2gh migrate-repo` migration with Azure Blob Storage where the archive is larger than 2GB, currently it fails with a `System.IO.IOException` stating: > The file is too long. This operation is currently limited to supporting files less than 2 gigabytes in size. This switches to streaming the file, rather than reading it all into memory, mirroring the approach (and using the code!) from `gh gei`. Fixes #974. <!-- For the checkboxes below you must check each one to indicate that you either did the relevant task, or considered it and decided there was nothing that needed doing --> - [x] Did you write/update appropriate tests - [x] Release notes updated (if appropriate) - [x] Appropriate logging output - [x] Issue linked - [ ] Docs updated (or issue created) - [ ] New package licenses are added to `ThirdPartyNotices.txt` (if applicable) <!-- For docs we should review the docs at: https://docs.github.com/en/migrations/using-github-enterprise-importer and the README.md in this repo If a doc update is required based on the changes in this PR, it is sufficient to create an issue and link to it here. The doc update can be made later/separately. --> --------- Co-authored-by: Dylan Smith <dylanfromwinnipeg@gmail.com>
1 parent 7bf64fe commit 103db85

File tree

3 files changed

+20
-16
lines changed

3 files changed

+20
-16
lines changed

RELEASENOTES.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
- Include the migration ID in the default output filename when running `download-logs`
22
- Include the date with the timestamp when writing to the log
33
- When blob storage credentials are provided to the CLI but will not be used for a GHES migration, log a clear warning, not an info message
4-
- Unhide the `--archive-download-host` argument in the documentation for `gh bbs2gh migrate-repo` and `gh bbs2gh generate-script`
4+
- Fix support for Azure Blob Storage in `bbs2gh` migrations when the archive is larger than 2GB
5+
- Unhide the `--archive-download-host` argument in the documentation for `gh bbs2gh migrate-repo` and `gh bbs2gh generate-script`

src/OctoshiftCLI.Tests/bbs2gh/Handlers/MigrateRepoCommandHandlerTests.cs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.IO;
23
using System.Text;
34
using System.Threading.Tasks;
45
using FluentAssertions;
@@ -156,7 +157,7 @@ public async Task Happy_Path_Generate_Archive_Ssh_Download_Azure_Upload_And_Inge
156157
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
157158
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
158159
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
159-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
160+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
160161
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
161162
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);
162163

@@ -240,8 +241,7 @@ public async Task Happy_Path_Full_Flow_Running_On_Bbs_Server()
240241

241242
_mockBbsApi.Setup(x => x.StartExport(BBS_PROJECT, BBS_REPO)).ReturnsAsync(BBS_EXPORT_ID);
242243
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
243-
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(archivePath)).ReturnsAsync(ARCHIVE_DATA);
244-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
244+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
245245
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
246246
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);
247247

@@ -296,7 +296,7 @@ public async Task Happy_Path_Full_Flow_Bbs_Credentials_Via_Environment()
296296
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
297297
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
298298
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
299-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
299+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
300300
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
301301
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);
302302

@@ -335,7 +335,7 @@ public async Task Happy_Path_Deletes_Downloaded_Archive()
335335
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
336336
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
337337
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
338-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
338+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
339339

340340
// Act
341341
var args = new MigrateRepoCommandArgs
@@ -367,7 +367,7 @@ public async Task It_Deletes_Downloaded_Archive_Even_If_Upload_Fails()
367367
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
368368
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
369369
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
370-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ThrowsAsync(new InvalidOperationException());
370+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ThrowsAsync(new InvalidOperationException());
371371

372372
// Act
373373
var args = new MigrateRepoCommandArgs
@@ -398,7 +398,7 @@ public async Task Happy_Path_Does_Not_Throw_If_Fails_To_Delete_Downloaded_Archiv
398398
_mockBbsApi.Setup(x => x.GetExport(BBS_EXPORT_ID)).ReturnsAsync(("COMPLETED", "The export is complete", 100));
399399
_mockBbsArchiveDownloader.Setup(x => x.Download(BBS_EXPORT_ID, It.IsAny<string>())).ReturnsAsync(ARCHIVE_PATH);
400400
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(ARCHIVE_DATA);
401-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), ARCHIVE_DATA)).ReturnsAsync(new Uri(ARCHIVE_URL));
401+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
402402
_mockFileSystemProvider.Setup(x => x.DeleteIfExists(It.IsAny<string>())).Throws(new UnauthorizedAccessException("Access Denied"));
403403

404404
// Act
@@ -616,7 +616,7 @@ public async Task Uses_Archive_Path_If_Provided()
616616
var archiveBytes = Encoding.ASCII.GetBytes("here are some bytes");
617617
_mockFileSystemProvider.Setup(x => x.ReadAllBytesAsync(ARCHIVE_PATH)).ReturnsAsync(archiveBytes);
618618

619-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), archiveBytes)).ReturnsAsync(new Uri(ARCHIVE_URL));
619+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
620620

621621
_mockGithubApi.Setup(x => x.GetOrganizationId(GITHUB_ORG).Result).Returns(GITHUB_ORG_ID);
622622
_mockGithubApi.Setup(x => x.CreateBbsMigrationSource(GITHUB_ORG_ID).Result).Returns(MIGRATION_SOURCE_ID);
@@ -763,7 +763,7 @@ await _handler.Invoking(x => x.Handle(args))
763763
public async Task It_Does_Not_Set_The_Archive_Path_When_Archive_Path_Is_Provided()
764764
{
765765
// Arrange
766-
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<byte[]>())).ReturnsAsync(new Uri(ARCHIVE_URL));
766+
_mockAzureApi.Setup(x => x.UploadToBlob(It.IsAny<string>(), It.IsAny<FileStream>())).ReturnsAsync(new Uri(ARCHIVE_URL));
767767

768768
// Act
769769
var args = new MigrateRepoCommandArgs
@@ -778,7 +778,7 @@ public async Task It_Does_Not_Set_The_Archive_Path_When_Archive_Path_Is_Provided
778778

779779
// Assert
780780
args.ArchivePath.Should().Be(ARCHIVE_PATH);
781-
_mockFileSystemProvider.Verify(m => m.ReadAllBytesAsync(ARCHIVE_PATH));
781+
_mockFileSystemProvider.Verify(m => m.OpenRead(ARCHIVE_PATH));
782782
}
783783

784784
[Fact]

src/bbs2gh/Handlers/MigrateRepoCommandHandler.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,14 @@ private async Task<string> UploadArchiveToAzure(string archivePath)
188188
{
189189
_log.LogInformation("Uploading Archive to Azure...");
190190

191-
var archiveData = await _fileSystemProvider.ReadAllBytesAsync(archivePath);
192-
var archiveName = GenerateArchiveName();
193-
var archiveBlobUrl = await _azureApi.UploadToBlob(archiveName, archiveData);
194-
195-
return archiveBlobUrl.ToString();
191+
#pragma warning disable IDE0063
192+
await using (var archiveData = _fileSystemProvider.OpenRead(archivePath))
193+
#pragma warning restore IDE0063
194+
{
195+
var archiveName = GenerateArchiveName();
196+
var archiveBlobUrl = await _azureApi.UploadToBlob(archiveName, archiveData);
197+
return archiveBlobUrl.ToString();
198+
}
196199
}
197200

198201
private string GenerateArchiveName() => $"{Guid.NewGuid()}.tar";

0 commit comments

Comments
 (0)