diff --git a/samples/xlsx/TestIssue809.xlsx b/samples/xlsx/TestIssue809.xlsx new file mode 100644 index 00000000..adde36fa Binary files /dev/null and b/samples/xlsx/TestIssue809.xlsx differ diff --git a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs index e0caffcf..6b868456 100644 --- a/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs +++ b/src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs @@ -22,7 +22,6 @@ internal partial class ExcelOpenXmlSheetReader : IExcelReader private static readonly string[] _relationshiopNs = { Config.SpreadsheetmlXmlRelationshipns, Config.SpreadsheetmlXmlStrictRelationshipns }; private List _sheetRecords; internal IDictionary _sharedStrings; - private MergeCells _mergeCells; private ExcelOpenXmlStyles _style; internal readonly ExcelOpenXmlZip _archive; private readonly OpenXmlConfiguration _config; @@ -140,7 +139,6 @@ public IAsyncEnumerable> QueryRangeAsync(bool useHea return QueryImplAsync(QueryRangeAsync(false, sheetName, startRowIndex, startColumnIndex, endRowIndex, endColumnIndex, cancellationToken), ReferenceHelper.ConvertXyToCell(startColumnIndex, startRowIndex), hasHeader, _config, cancellationToken); } - [Zomp.SyncMethodGenerator.CreateSyncVersion] internal async IAsyncEnumerable> InternalQueryRangeAsync(bool useHeaderRow, string sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default) { @@ -159,15 +157,12 @@ internal async IAsyncEnumerable> InternalQueryRangeA // TODO: need to optimize performance // Q. why need 3 times openstream merge one open read? A. no, zipstream can't use position = 0 - var mergeCellsContext = new MergeCellsContext { }; - + var mergeCellsContext = new MergeCellsContext(); if (_config.FillMergedCells && !await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false)) { yield break; } - _mergeCells = mergeCellsContext.MergeCells; - var maxRowColumnIndexResult = await TryGetMaxRowColumnIndexAsync(sheetEntry, cancellationToken).ConfigureAwait(false); if (!maxRowColumnIndexResult.IsSuccess) { @@ -223,89 +218,16 @@ internal async IAsyncEnumerable> InternalQueryRangeA break; } - // fill empty rows - if (!_config.IgnoreEmptyRows) - { - var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex; - if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex) - { - for (int i = expectedRowIndex; i < rowIndex; i++) - { - yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex); - } - } - } - - // row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is - if (!await XmlReaderHelper.ReadFirstContentAsync(reader, cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows) + await foreach (var row in QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex, rowIndex, startColumnIndex, endColumnIndex, maxColumnIndex, withoutCR, useHeaderRow, headRows, mergeCellsContext.MergeCells, cancellationToken).ConfigureAwait(false)) { - //Fill in case of self closed empty row tag eg. - yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex); - continue; - } - - #region Set Cells - - var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex); - var columnIndex = withoutCR ? -1 : 0; - while (!reader.EOF) - { - if (XmlReaderHelper.IsStartElement(reader, "c", _ns)) + if (isFirstRow) { - var aS = reader.GetAttribute("s"); - var aR = reader.GetAttribute("r"); - var aT = reader.GetAttribute("t"); - var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCR, - startColumnIndex, aR, aT, cancellationToken).ConfigureAwait(false); - - var cellValue = cellAndColumn.CellValue; - columnIndex = cellAndColumn.ColumnIndex; - - if (_config.FillMergedCells) - { - if (_mergeCells.MergesValues.ContainsKey(aR)) - { - _mergeCells.MergesValues[aR] = cellValue; - } - else if (_mergeCells.MergesMap.TryGetValue(aR, out var mergeKey)) - { - _mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue); - } - } - - if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value)) + isFirstRow = false; // for startcell logic + if (useHeaderRow) continue; - - if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml - { - int xfIndex = -1; - if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, - out var styleIndex)) - xfIndex = styleIndex; - - // only when have s attribute then load styles xml data - if (_style == null) - _style = new ExcelOpenXmlStyles(_archive); - - cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue); - } - - SetCellsValueAndHeaders(cellValue, useHeaderRow, ref headRows, ref isFirstRow, ref cell, columnIndex); } - else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false)) - break; + yield return row; } - - #endregion - - if (isFirstRow) - { - isFirstRow = false; // for startcell logic - if (useHeaderRow) - continue; - } - - yield return cell; } else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false)) { @@ -321,6 +243,94 @@ internal async IAsyncEnumerable> InternalQueryRangeA } } + [Zomp.SyncMethodGenerator.CreateSyncVersion] + private async IAsyncEnumerable> QueryRowAsync( + XmlReader reader, + bool isFirstRow, + int startRowIndex, + int nextRowIndex, + int rowIndex, + int startColumnIndex, + int? endColumnIndex, + int maxColumnIndex, + bool withoutCR, + bool useHeaderRow, + Dictionary headRows, + MergeCells mergeCells, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + // fill empty rows + if (!_config.IgnoreEmptyRows) + { + var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex; + if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex) + { + for (int i = expectedRowIndex; i < rowIndex; i++) + { + yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex); + } + } + } + + // row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is + if (!await XmlReaderHelper.ReadFirstContentAsync(reader, cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows) + { + //Fill in case of self closed empty row tag eg. + yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex); + yield break; + } + + var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex); + var columnIndex = withoutCR ? -1 : 0; + while (!reader.EOF) + { + if (XmlReaderHelper.IsStartElement(reader, "c", _ns)) + { + var aS = reader.GetAttribute("s"); + var aR = reader.GetAttribute("r"); + var aT = reader.GetAttribute("t"); + var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCR, startColumnIndex, aR, aT, cancellationToken).ConfigureAwait(false); + + var cellValue = cellAndColumn.CellValue; + columnIndex = cellAndColumn.ColumnIndex; + + if (_config.FillMergedCells) + { + if (mergeCells.MergesValues.ContainsKey(aR)) + { + mergeCells.MergesValues[aR] = cellValue; + } + else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey)) + { + mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue); + } + } + + if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value)) + continue; + + if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml + { + int xfIndex = -1; + if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture, + out var styleIndex)) + xfIndex = styleIndex; + + // only when have s attribute then load styles xml data + if (_style == null) + _style = new ExcelOpenXmlStyles(_archive); + + cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue); + } + + SetCellsValueAndHeaders(cellValue, useHeaderRow, headRows, isFirstRow, cell, columnIndex); + } + else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false)) + break; + } + yield return cell; + } + [Zomp.SyncMethodGenerator.CreateSyncVersion] public static async IAsyncEnumerable QueryImplAsync(IAsyncEnumerable> values, string startCell, bool hasHeader, Configuration configuration, [EnumeratorCancellation] CancellationToken cancellationToken = default) where T : class, new() { @@ -434,7 +444,7 @@ private static IDictionary GetCell(bool useHeaderRow, int maxCol return useHeaderRow ? CustomPropertyHelper.GetEmptyExpandoObject(headRows) : CustomPropertyHelper.GetEmptyExpandoObject(maxColumnIndex, startColumnIndex); } - private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, ref Dictionary headRows, ref bool isFirstRow, ref IDictionary cell, int columnIndex) + private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, Dictionary headRows, bool isFirstRow, IDictionary cell, int columnIndex) { if (!useHeaderRow) { @@ -1093,7 +1103,7 @@ internal static async Task TryGetMergeCellsAsync(ZipArchiveEntry sheetEntr #else true #endif - ); + ); var mergeCells = new MergeCells(); using (var sheetStream = sheetEntry.Open()) using (XmlReader reader = XmlReader.Create(sheetStream, xmlSettings)) diff --git a/tests/MiniExcelTests/MiniExcelIssueTests.cs b/tests/MiniExcelTests/MiniExcelIssueTests.cs index 25cb62d4..d08241a7 100644 --- a/tests/MiniExcelTests/MiniExcelIssueTests.cs +++ b/tests/MiniExcelTests/MiniExcelIssueTests.cs @@ -20,6 +20,7 @@ using static MiniExcelLibs.Tests.MiniExcelOpenXmlTests; using MiniExcelLibs.Picture; using TableStyles = MiniExcelLibs.OpenXml.TableStyles; +using System.Threading.Tasks; namespace MiniExcelLibs.Tests; @@ -534,7 +535,7 @@ public void TestIssue401(bool autoFilter, int count) using (var connection = Db.GetConnection("Data Source=:memory:")) { connection.Open(); - + using var command = connection.CreateCommand(); command.CommandText = """ @@ -545,11 +546,11 @@ 1 as Column2 UNION ALL SELECT 'Github', 2 """; - + using var reader = command.ExecuteReader(); MiniExcel.SaveAs(path.ToString(), reader, configuration: config); } - + var xml = Helpers.GetZipFileContent(path.ToString(), "xl/worksheets/sheet1.xml"); var cnt = Regex.Matches(xml, "autoFilter").Count; Assert.Equal(count, cnt); @@ -2925,7 +2926,7 @@ public void Issue206() dt.Columns.Add("name"); dt.Columns.Add("department"); dt.Rows.Add("Jack", "HR"); - + var value = new Dictionary { ["employees"] = dt }; MiniExcel.SaveAsByTemplate(path.ToString(), templatePath, value); @@ -3644,12 +3645,12 @@ public void Issue459() var values = new { title = "FooCompany", - managers = new[] + managers = new[] { new { name = "Jack", department = "HR" }, new { name = "Loan", department = "IT" } }, - employees = new[] + employees = new[] { new { name = "Wade", department = "HR" }, new { name = "Felix", department = "HR" }, @@ -3657,10 +3658,10 @@ public void Issue459() new { name = "Keaton", department = "IT" } } }; - + ms.SaveAsByTemplate(template, values); } - + [Fact] public void Issue527() { @@ -3669,10 +3670,10 @@ public void Issue527() new() { Name = "Bill", UserType = DescriptionEnum.V1 }, new() { Name = "Bob", UserType = DescriptionEnum.V2 } ]; - + var value = new { t = row }; var template = PathHelper.GetFile("xlsx/Issue527Template.xlsx"); - + using var path = AutoDeletingPath.Create(); MiniExcel.SaveAsByTemplate(path.FilePath, template, value); @@ -3695,9 +3696,9 @@ public void TestIssue584() using var conn = Db.GetConnection(); conn.Open(); - + using var cmd = conn.CreateCommand(); - cmd.CommandText = + cmd.CommandText = """ WITH test('Id', 'Name') AS ( VALUES @@ -4248,10 +4249,10 @@ public void TestIssue768() var list = Enumerable.Range(0, 10) .Select(_ => new - { - value1 = Guid.NewGuid(), - value2 = Guid.NewGuid() - } + { + value1 = Guid.NewGuid(), + value2 = Guid.NewGuid() + } ) .ToList(); @@ -4266,7 +4267,7 @@ public void TestIssue768() Assert.Equal(list[0].value1.ToString(), rows[0].A.ToString()); Assert.Equal(list[1].value1.ToString(), rows[1].A.ToString()); } - + /// /// https://github.com/mini-software/MiniExcel/issues/186 /// @@ -4276,7 +4277,7 @@ public void TestIssue186() var originPath = PathHelper.GetFile("xlsx/TestIssue186_Template.xlsx"); using var path = AutoDeletingPath.Create(); File.Copy(originPath, path.FilePath); - + MiniExcelPicture[] images = [ new() @@ -4295,10 +4296,10 @@ public void TestIssue186() HeightPx = 100 } ]; - + MiniExcel.AddPicture(path.FilePath, images); } - + /// /// https://github.com/mini-software/MiniExcel/issues/771 /// @@ -4307,7 +4308,7 @@ public void TestIssue771() { var template = PathHelper.GetFile("xlsx/TestIssue771.xlsx"); using var path = AutoDeletingPath.Create(); - + var value = new { list = GetEnumerable(), @@ -4323,10 +4324,10 @@ public void TestIssue771() list11 = GetEnumerable(), list12 = GetEnumerable() }; - + MiniExcel.SaveAsByTemplate(path.FilePath, template, value); var rows = MiniExcel.Query(path.FilePath).ToList(); - + Assert.Equal("2025-1", rows[2].B); Assert.Equal(null, rows[3].B); Assert.Equal(null, rows[4].B); @@ -4335,7 +4336,7 @@ public void TestIssue771() IEnumerable GetEnumerable() => Enumerable.Range(0, 3).Select(s => new { ID = Guid.NewGuid(), level = s }); } - + /// /// https://github.com/mini-software/MiniExcel/issues/772 /// @@ -4343,10 +4344,10 @@ public void TestIssue771() public void TestIssue772() { var path = PathHelper.GetFile("xlsx/TestIssue772.xlsx"); - var rows = MiniExcel.Query(path, sheetName: "Supply plan(daily)", startCell:"A1") + var rows = MiniExcel.Query(path, sheetName: "Supply plan(daily)", startCell: "A1") .Cast>() .ToArray(); - + Assert.Equal("01108083-1Delta", (string)rows[19]["C"]); } @@ -4369,7 +4370,7 @@ public void TestIssue773() MiniExcel.SaveAsByTemplate(path.FilePath, templatePath, fill); var rows = MiniExcel.Query(path.FilePath).ToList(); - + Assert.Equal("H1", rows[4].AF); Assert.Equal("c3", rows[6].AA); Assert.Equal("Ram", rows[6].B); @@ -4393,4 +4394,17 @@ public void TestIssue789() Assert.Contains("", xml); } + + /// + /// https://github.com/mini-software/MiniExcel/issues/809 + /// + [Fact] + public void TestIssue809() + { + var path = PathHelper.GetFile("xlsx/TestIssue809.xlsx"); + var rows = MiniExcel.Query(path).ToList(); + Assert.Equal(3, rows.Count); + Assert.Equal(null, rows[0].A); + Assert.Equal(2, rows[2].B); + } } \ No newline at end of file