diff --git a/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json b/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json index 06fed1bdc47..756e8760422 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json +++ b/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json @@ -810,7 +810,7 @@ "no_training_data_file_uploaded": "No training data file uploaded", "no_upload_offline": "Training data cannot be uploaded without connecting to the internet.", "save": "Save", - "upload_failed": "The training data file could not be uploaded. Please try again or use a different file.", + "upload_failed": "The training data file could not be uploaded. Please ensure that the spreadsheet contains two columns and try again.", "upload_training_data": "Upload Training Data", "uploading": "Uploading..." }, diff --git a/src/SIL.XForge.Scripture/Services/TrainingDataService.cs b/src/SIL.XForge.Scripture/Services/TrainingDataService.cs index 9d1e4df8bc1..4e250168a15 100644 --- a/src/SIL.XForge.Scripture/Services/TrainingDataService.cs +++ b/src/SIL.XForge.Scripture/Services/TrainingDataService.cs @@ -28,8 +28,9 @@ IOptions siteOptions /// /// The training directory name. This is not the full path to the directory. /// - /// public const string DirectoryName = "training-data"; + public const int NumTrainingDataColumns = 2; + private static readonly CsvConfiguration _csvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture) { BadDataFound = null, @@ -50,7 +51,7 @@ public async Task ConvertExcelToCsvAsync(IWorkbook workbook, Stream outputStream // Verify there is a worksheet if (workbook.NumberOfSheets == 0) { - throw new FormatException("The Excel file does not contain a worksheet"); + throw new FormatException("The Excel file contains a row with fewer than two columns"); } // Load the Excel file @@ -59,11 +60,15 @@ public async Task ConvertExcelToCsvAsync(IWorkbook workbook, Stream outputStream for (int rowNum = sheet.FirstRowNum; rowNum <= sheet.LastRowNum; rowNum++) { IRow row = sheet.GetRow(rowNum); - if (row is null || row.FirstCellNum == -1 || row.LastCellNum - row.FirstCellNum < 2) + if (row is null || row.FirstCellNum == -1) { // Skip if there are not two columns of data in this row that are side-by-side continue; } + if (row.LastCellNum - row.FirstCellNum < NumTrainingDataColumns) + { + throw new FormatException("The Excel file contains a row with fewer than two columns"); + } string firstColumn = row.GetCell(row.FirstCellNum, MissingCellPolicy.CREATE_NULL_AS_BLANK).ToString() ?? string.Empty; @@ -309,10 +314,28 @@ private async Task ConvertToCsvAsync(string path, string outputPath) using StreamReader streamReader = new StreamReader(fileStream); using CsvReader csvReader = new CsvReader(streamReader, _csvConfiguration); await csvReader.ReadAsync(); - if (csvReader.ColumnCount != 2) + if (csvReader.ColumnCount < NumTrainingDataColumns) { throw new FormatException("The CSV file does not contain two columns"); } + + // Detect if a row has fewer than two columns of data + while (await csvReader.ReadAsync()) + { + if (csvReader.Parser.Record.All(r => string.IsNullOrEmpty(r))) + { + // Skip empty rows + continue; + } + int[] columnsToCheck = [0, 1]; + if ( + csvReader.ColumnCount < NumTrainingDataColumns + || columnsToCheck.Any(i => string.IsNullOrEmpty(csvReader.Parser.Record[i])) + ) + { + throw new FormatException("The CSV file contains a row with fewer than two columns"); + } + } } // Relocate the temporary file to the new directory diff --git a/test/SIL.XForge.Scripture.Tests/Services/TrainingDataServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/TrainingDataServiceTests.cs index feb95c02b06..7a6eb1d1d87 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/TrainingDataServiceTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/TrainingDataServiceTests.cs @@ -332,6 +332,43 @@ public async Task SaveTrainingDataAsync_NotEnoughColumns() ); } + [Test] + public async Task SaveTrainingDataAsync_RowWithMissingEntry() + { + var env = new TestEnvironment(); + + // Set up the input file + await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data\nValue1,")); + env.FileSystemService.OpenFile(FileCsv, FileMode.Open).Returns(fileStream); + + // SUT + Assert.ThrowsAsync(() => + env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv) + ); + } + + [Test] + public async Task SaveTrainingDataAsync_RowWithNoData() + { + var env = new TestEnvironment(); + + // Set up the input file + await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data\n\"\",")); + env.FileSystemService.OpenFile(FileCsv, FileMode.Open).Returns(fileStream); + + // SUT + Uri actual = await env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv); + Assert.That( + actual + .ToString() + .StartsWith($"/assets/{TrainingDataService.DirectoryName}/{Project01}/{User01}_{Data01}.csv?t="), + Is.True + ); + + env.FileSystemService.Received(1).DeleteFile(Arg.Any()); + env.FileSystemService.Received(1).MoveFile(Arg.Any(), Arg.Any()); + } + [Test] public async Task SaveTrainingDataAsync_SupportsCsvFiles() { @@ -487,18 +524,107 @@ public async Task SaveTrainingDataAsync_SupportsXlsxFiles() } [Test] - public async Task SaveTrainingDataAsync_TooManyColumns() + public async Task SaveTrainingDataAsync_ExcelFileRowWithMissingColumn() + { + var env = new TestEnvironment(); + + // Create the input file + await using var fileStream = new MemoryStream(); + using var workbook = new HSSFWorkbook(); + ISheet sheet = workbook.CreateSheet("Sheet1"); + IRow row1 = sheet.CreateRow(0); + row1.CreateCell(0).SetCellValue("Test"); // A1 + row1.CreateCell(1).SetCellValue("Data"); // A2 + IRow row2 = sheet.CreateRow(1); + row2.CreateCell(0).SetCellValue("Missing"); // B1 + workbook.Write(fileStream, leaveOpen: true); + fileStream.Seek(0, SeekOrigin.Begin); + env.FileSystemService.OpenFile(FileExcel2003, FileMode.Open).Returns(fileStream); + + // Create the output file + string path = Path.Join( + env.SiteOptions.Value.SiteDir, + TrainingDataService.DirectoryName, + Project01, + $"{User01}_{Data01}.csv" + ); + await using var outputStream = new NonDisposingMemoryStream(); + env.FileSystemService.CreateFile(path).Returns(outputStream); + + // SUT + Assert.ThrowsAsync(() => + env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileExcel2003) + ); + outputStream.ForceDispose(); + } + + [Test] + public async Task SaveTrainingDataAsync_ExcelFileRowWithEmptyRow() + { + var env = new TestEnvironment(); + + // Create the input file + await using var fileStream = new MemoryStream(); + using var workbook = new HSSFWorkbook(); + ISheet sheet = workbook.CreateSheet("Sheet1"); + IRow row1 = sheet.CreateRow(0); + row1.CreateCell(0).SetCellValue("Test"); // A1 + row1.CreateCell(1).SetCellValue("Data"); // A2 + IRow row3 = sheet.CreateRow(2); + row3.CreateCell(0).SetCellValue("Skip"); // C1 + row3.CreateCell(1).SetCellValue("Row"); // C2 + workbook.Write(fileStream, leaveOpen: true); + fileStream.Seek(0, SeekOrigin.Begin); + env.FileSystemService.OpenFile(FileExcel2003, FileMode.Open).Returns(fileStream); + + // Create the output file + string path = Path.Join( + env.SiteOptions.Value.SiteDir, + TrainingDataService.DirectoryName, + Project01, + $"{User01}_{Data01}.csv" + ); + await using var outputStream = new NonDisposingMemoryStream(); + env.FileSystemService.CreateFile(path).Returns(outputStream); + + // SUT + Uri actual = await env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileExcel2003); + Assert.That( + actual + .ToString() + .StartsWith($"/assets/{TrainingDataService.DirectoryName}/{Project01}/{User01}_{Data01}.csv?t="), + Is.True + ); + + using var reader = new StreamReader(outputStream); + string text = await reader.ReadToEndAsync(); + text = text.TrimEnd(); // Remove trailing new lines + text = text.Replace("\r\n", " "); // Handle newlines in both linux and windows environments + text = text.Replace("\n", " "); + Assert.AreEqual($"Test,Data Skip,Row", text); + outputStream.ForceDispose(); + } + + [Test] + public async Task SaveTrainingDataAsync_AllowsMoreThanTwoColumns() { var env = new TestEnvironment(); // Set up the input file - await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data,More")); + await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data,More\nSecond,Row,\"\"")); env.FileSystemService.OpenFile(FileCsv, FileMode.Open).Returns(fileStream); // SUT - Assert.ThrowsAsync(() => - env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv) + Uri actual = await env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv); + Assert.That( + actual + .ToString() + .StartsWith($"/assets/{TrainingDataService.DirectoryName}/{Project01}/{User01}_{Data01}.csv?t="), + Is.True ); + + env.FileSystemService.Received(1).DeleteFile(Arg.Any()); + env.FileSystemService.Received(1).MoveFile(Arg.Any(), Arg.Any()); } ///