Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@
"no_training_data_file_uploaded": "No training data file uploaded",
"no_upload_offline": "Training data cannot be uploaded without connecting to the internet.",
"save": "Save",
"upload_failed": "The training data file could not be uploaded. Please try again or use a different file.",
"upload_failed": "The training data file could not be uploaded. Please ensure that the spreadsheet contains two columns and try again.",
"upload_training_data": "Upload Training Data",
"uploading": "Uploading..."
},
Expand Down
31 changes: 27 additions & 4 deletions src/SIL.XForge.Scripture/Services/TrainingDataService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ IOptions<SiteOptions> siteOptions
/// <summary>
/// The training directory name. This is not the full path to the directory.
/// </summary>
///
public const string DirectoryName = "training-data";
public const int NumTrainingDataColumns = 2;

private static readonly CsvConfiguration _csvConfiguration = new CsvConfiguration(CultureInfo.InvariantCulture)
{
BadDataFound = null,
Expand All @@ -50,7 +51,7 @@ public async Task ConvertExcelToCsvAsync(IWorkbook workbook, Stream outputStream
// Verify there is a worksheet
if (workbook.NumberOfSheets == 0)
{
throw new FormatException("The Excel file does not contain a worksheet");
throw new FormatException("The Excel file contains a row with fewer than two columns");
}

// Load the Excel file
Expand All @@ -59,11 +60,15 @@ public async Task ConvertExcelToCsvAsync(IWorkbook workbook, Stream outputStream
for (int rowNum = sheet.FirstRowNum; rowNum <= sheet.LastRowNum; rowNum++)
{
IRow row = sheet.GetRow(rowNum);
if (row is null || row.FirstCellNum == -1 || row.LastCellNum - row.FirstCellNum < 2)
if (row is null || row.FirstCellNum == -1)
{
// Skip if there are not two columns of data in this row that are side-by-side
continue;
}
if (row.LastCellNum - row.FirstCellNum < NumTrainingDataColumns)
{
throw new FormatException("The Excel file contains a row with fewer than two columns");
}

string firstColumn =
row.GetCell(row.FirstCellNum, MissingCellPolicy.CREATE_NULL_AS_BLANK).ToString() ?? string.Empty;
Expand Down Expand Up @@ -309,10 +314,28 @@ private async Task ConvertToCsvAsync(string path, string outputPath)
using StreamReader streamReader = new StreamReader(fileStream);
using CsvReader csvReader = new CsvReader(streamReader, _csvConfiguration);
await csvReader.ReadAsync();
if (csvReader.ColumnCount != 2)
if (csvReader.ColumnCount < NumTrainingDataColumns)
{
throw new FormatException("The CSV file does not contain two columns");
}

// Detect if a row has fewer than two columns of data
while (await csvReader.ReadAsync())
{
if (csvReader.Parser.Record.All(r => string.IsNullOrEmpty(r)))
{
// Skip empty rows
continue;
}
int[] columnsToCheck = [0, 1];
if (
csvReader.ColumnCount < NumTrainingDataColumns
|| columnsToCheck.Any(i => string.IsNullOrEmpty(csvReader.Parser.Record[i]))
)
{
throw new FormatException("The CSV file contains a row with fewer than two columns");
}
}
}

// Relocate the temporary file to the new directory
Expand Down
134 changes: 130 additions & 4 deletions test/SIL.XForge.Scripture.Tests/Services/TrainingDataServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,43 @@ public async Task SaveTrainingDataAsync_NotEnoughColumns()
);
}

[Test]
public async Task SaveTrainingDataAsync_RowWithMissingEntry()
{
var env = new TestEnvironment();

// Set up the input file
await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data\nValue1,"));
env.FileSystemService.OpenFile(FileCsv, FileMode.Open).Returns(fileStream);

// SUT
Assert.ThrowsAsync<FormatException>(() =>
env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv)
);
}

[Test]
public async Task SaveTrainingDataAsync_RowWithNoData()
{
var env = new TestEnvironment();

// Set up the input file
await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data\n\"\","));
env.FileSystemService.OpenFile(FileCsv, FileMode.Open).Returns(fileStream);

// SUT
Uri actual = await env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv);
Assert.That(
actual
.ToString()
.StartsWith($"/assets/{TrainingDataService.DirectoryName}/{Project01}/{User01}_{Data01}.csv?t="),
Is.True
);

env.FileSystemService.Received(1).DeleteFile(Arg.Any<string>());
env.FileSystemService.Received(1).MoveFile(Arg.Any<string>(), Arg.Any<string>());
}

[Test]
public async Task SaveTrainingDataAsync_SupportsCsvFiles()
{
Expand Down Expand Up @@ -487,18 +524,107 @@ public async Task SaveTrainingDataAsync_SupportsXlsxFiles()
}

[Test]
public async Task SaveTrainingDataAsync_TooManyColumns()
public async Task SaveTrainingDataAsync_ExcelFileRowWithMissingColumn()
{
var env = new TestEnvironment();

// Create the input file
await using var fileStream = new MemoryStream();
using var workbook = new HSSFWorkbook();
ISheet sheet = workbook.CreateSheet("Sheet1");
IRow row1 = sheet.CreateRow(0);
row1.CreateCell(0).SetCellValue("Test"); // A1
row1.CreateCell(1).SetCellValue("Data"); // A2
IRow row2 = sheet.CreateRow(1);
row2.CreateCell(0).SetCellValue("Missing"); // B1
workbook.Write(fileStream, leaveOpen: true);
fileStream.Seek(0, SeekOrigin.Begin);
env.FileSystemService.OpenFile(FileExcel2003, FileMode.Open).Returns(fileStream);

// Create the output file
string path = Path.Join(
env.SiteOptions.Value.SiteDir,
TrainingDataService.DirectoryName,
Project01,
$"{User01}_{Data01}.csv"
);
await using var outputStream = new NonDisposingMemoryStream();
env.FileSystemService.CreateFile(path).Returns(outputStream);

// SUT
Assert.ThrowsAsync<FormatException>(() =>
env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileExcel2003)
);
outputStream.ForceDispose();
}

[Test]
public async Task SaveTrainingDataAsync_ExcelFileRowWithEmptyRow()
{
var env = new TestEnvironment();

// Create the input file
await using var fileStream = new MemoryStream();
using var workbook = new HSSFWorkbook();
ISheet sheet = workbook.CreateSheet("Sheet1");
IRow row1 = sheet.CreateRow(0);
row1.CreateCell(0).SetCellValue("Test"); // A1
row1.CreateCell(1).SetCellValue("Data"); // A2
IRow row3 = sheet.CreateRow(2);
row3.CreateCell(0).SetCellValue("Skip"); // C1
row3.CreateCell(1).SetCellValue("Row"); // C2
workbook.Write(fileStream, leaveOpen: true);
fileStream.Seek(0, SeekOrigin.Begin);
env.FileSystemService.OpenFile(FileExcel2003, FileMode.Open).Returns(fileStream);

// Create the output file
string path = Path.Join(
env.SiteOptions.Value.SiteDir,
TrainingDataService.DirectoryName,
Project01,
$"{User01}_{Data01}.csv"
);
await using var outputStream = new NonDisposingMemoryStream();
env.FileSystemService.CreateFile(path).Returns(outputStream);

// SUT
Uri actual = await env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileExcel2003);
Assert.That(
actual
.ToString()
.StartsWith($"/assets/{TrainingDataService.DirectoryName}/{Project01}/{User01}_{Data01}.csv?t="),
Is.True
);

using var reader = new StreamReader(outputStream);
string text = await reader.ReadToEndAsync();
text = text.TrimEnd(); // Remove trailing new lines
text = text.Replace("\r\n", " "); // Handle newlines in both linux and windows environments
text = text.Replace("\n", " ");
Assert.AreEqual($"Test,Data Skip,Row", text);
outputStream.ForceDispose();
}

[Test]
public async Task SaveTrainingDataAsync_AllowsMoreThanTwoColumns()
{
var env = new TestEnvironment();

// Set up the input file
await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data,More"));
await using var fileStream = new MemoryStream(Encoding.UTF8.GetBytes("Test,Data,More\nSecond,Row,\"\""));
env.FileSystemService.OpenFile(FileCsv, FileMode.Open).Returns(fileStream);

// SUT
Assert.ThrowsAsync<FormatException>(() =>
env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv)
Uri actual = await env.Service.SaveTrainingDataAsync(User01, Project01, Data01, FileCsv);
Assert.That(
actual
.ToString()
.StartsWith($"/assets/{TrainingDataService.DirectoryName}/{Project01}/{User01}_{Data01}.csv?t="),
Is.True
);

env.FileSystemService.Received(1).DeleteFile(Arg.Any<string>());
env.FileSystemService.Received(1).MoveFile(Arg.Any<string>(), Arg.Any<string>());
}

/// <summary>
Expand Down
Loading