Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added samples/xlsx/TestIssue809.xlsx
Binary file not shown.
182 changes: 96 additions & 86 deletions src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ internal partial class ExcelOpenXmlSheetReader : IExcelReader
private static readonly string[] _relationshiopNs = { Config.SpreadsheetmlXmlRelationshipns, Config.SpreadsheetmlXmlStrictRelationshipns };
private List<SheetRecord> _sheetRecords;
internal IDictionary<int, string> _sharedStrings;
private MergeCells _mergeCells;
private ExcelOpenXmlStyles _style;
internal readonly ExcelOpenXmlZip _archive;
private readonly OpenXmlConfiguration _config;
Expand Down Expand Up @@ -140,7 +139,6 @@ public IAsyncEnumerable<IDictionary<string, object>> QueryRangeAsync(bool useHea
return QueryImplAsync<T>(QueryRangeAsync(false, sheetName, startRowIndex, startColumnIndex, endRowIndex, endColumnIndex, cancellationToken), ReferenceHelper.ConvertXyToCell(startColumnIndex, startRowIndex), hasHeader, _config, cancellationToken);
}


[Zomp.SyncMethodGenerator.CreateSyncVersion]
internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeAsync(bool useHeaderRow, string sheetName, int startRowIndex, int startColumnIndex, int? endRowIndex, int? endColumnIndex, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
Expand All @@ -159,15 +157,12 @@ internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeA
// TODO: need to optimize performance
// Q. why need 3 times openstream merge one open read? A. no, zipstream can't use position = 0

var mergeCellsContext = new MergeCellsContext { };

var mergeCellsContext = new MergeCellsContext();
if (_config.FillMergedCells && !await TryGetMergeCellsAsync(sheetEntry, mergeCellsContext, cancellationToken).ConfigureAwait(false))
{
yield break;
}

_mergeCells = mergeCellsContext.MergeCells;

var maxRowColumnIndexResult = await TryGetMaxRowColumnIndexAsync(sheetEntry, cancellationToken).ConfigureAwait(false);
if (!maxRowColumnIndexResult.IsSuccess)
{
Expand Down Expand Up @@ -223,89 +218,16 @@ internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeA
break;
}

// fill empty rows
if (!_config.IgnoreEmptyRows)
{
var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex;
if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex)
{
for (int i = expectedRowIndex; i < rowIndex; i++)
{
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
}
}
}

// row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is <row r="2"/>
if (!await XmlReaderHelper.ReadFirstContentAsync(reader, cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows)
await foreach (var row in QueryRowAsync(reader, isFirstRow, startRowIndex, nextRowIndex, rowIndex, startColumnIndex, endColumnIndex, maxColumnIndex, withoutCR, useHeaderRow, headRows, mergeCellsContext.MergeCells, cancellationToken).ConfigureAwait(false))
{
//Fill in case of self closed empty row tag eg. <row r="1"/>
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
continue;
}

#region Set Cells

var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
var columnIndex = withoutCR ? -1 : 0;
while (!reader.EOF)
{
if (XmlReaderHelper.IsStartElement(reader, "c", _ns))
if (isFirstRow)
{
var aS = reader.GetAttribute("s");
var aR = reader.GetAttribute("r");
var aT = reader.GetAttribute("t");
var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCR,
startColumnIndex, aR, aT, cancellationToken).ConfigureAwait(false);

var cellValue = cellAndColumn.CellValue;
columnIndex = cellAndColumn.ColumnIndex;

if (_config.FillMergedCells)
{
if (_mergeCells.MergesValues.ContainsKey(aR))
{
_mergeCells.MergesValues[aR] = cellValue;
}
else if (_mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
{
_mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
}
}

if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value))
isFirstRow = false; // for startcell logic
if (useHeaderRow)
continue;

if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml
{
int xfIndex = -1;
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture,
out var styleIndex))
xfIndex = styleIndex;

// only when have s attribute then load styles xml data
if (_style == null)
_style = new ExcelOpenXmlStyles(_archive);

cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
}

SetCellsValueAndHeaders(cellValue, useHeaderRow, ref headRows, ref isFirstRow, ref cell, columnIndex);
}
else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false))
break;
yield return row;
}

#endregion

if (isFirstRow)
{
isFirstRow = false; // for startcell logic
if (useHeaderRow)
continue;
}

yield return cell;
}
else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false))
{
Expand All @@ -321,6 +243,94 @@ internal async IAsyncEnumerable<IDictionary<string, object>> InternalQueryRangeA
}
}

[Zomp.SyncMethodGenerator.CreateSyncVersion]
private async IAsyncEnumerable<IDictionary<string, object>> QueryRowAsync(
XmlReader reader,
bool isFirstRow,
int startRowIndex,
int nextRowIndex,
int rowIndex,
int startColumnIndex,
int? endColumnIndex,
int maxColumnIndex,
bool withoutCR,
bool useHeaderRow,
Dictionary<int, string> headRows,
MergeCells mergeCells,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
// fill empty rows
if (!_config.IgnoreEmptyRows)
{
var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex;
if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex)
{
for (int i = expectedRowIndex; i < rowIndex; i++)
{
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
}
}
}

// row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is <row r="2"/>
if (!await XmlReaderHelper.ReadFirstContentAsync(reader, cancellationToken).ConfigureAwait(false) && !_config.IgnoreEmptyRows)
{
//Fill in case of self closed empty row tag eg. <row r="1"/>
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
yield break;
}

var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
var columnIndex = withoutCR ? -1 : 0;
while (!reader.EOF)
{
if (XmlReaderHelper.IsStartElement(reader, "c", _ns))
{
var aS = reader.GetAttribute("s");
var aR = reader.GetAttribute("r");
var aT = reader.GetAttribute("t");
var cellAndColumn = await ReadCellAndSetColumnIndexAsync(reader, columnIndex, withoutCR, startColumnIndex, aR, aT, cancellationToken).ConfigureAwait(false);

var cellValue = cellAndColumn.CellValue;
columnIndex = cellAndColumn.ColumnIndex;

if (_config.FillMergedCells)
{
if (mergeCells.MergesValues.ContainsKey(aR))
{
mergeCells.MergesValues[aR] = cellValue;
}
else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
{
mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
}
}

if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value))
continue;

if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml
{
int xfIndex = -1;
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture,
out var styleIndex))
xfIndex = styleIndex;

// only when have s attribute then load styles xml data
if (_style == null)
_style = new ExcelOpenXmlStyles(_archive);

cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
}

SetCellsValueAndHeaders(cellValue, useHeaderRow, headRows, isFirstRow, cell, columnIndex);
}
else if (!await XmlReaderHelper.SkipContentAsync(reader, cancellationToken).ConfigureAwait(false))
break;
}
yield return cell;
}

[Zomp.SyncMethodGenerator.CreateSyncVersion]
public static async IAsyncEnumerable<T> QueryImplAsync<T>(IAsyncEnumerable<IDictionary<string, object>> values, string startCell, bool hasHeader, Configuration configuration, [EnumeratorCancellation] CancellationToken cancellationToken = default) where T : class, new()
{
Expand Down Expand Up @@ -434,7 +444,7 @@ private static IDictionary<string, object> GetCell(bool useHeaderRow, int maxCol
return useHeaderRow ? CustomPropertyHelper.GetEmptyExpandoObject(headRows) : CustomPropertyHelper.GetEmptyExpandoObject(maxColumnIndex, startColumnIndex);
}

private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, ref Dictionary<int, string> headRows, ref bool isFirstRow, ref IDictionary<string, object> cell, int columnIndex)
private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, Dictionary<int, string> headRows, bool isFirstRow, IDictionary<string, object> cell, int columnIndex)
{
if (!useHeaderRow)
{
Expand Down Expand Up @@ -1093,7 +1103,7 @@ internal static async Task<bool> TryGetMergeCellsAsync(ZipArchiveEntry sheetEntr
#else
true
#endif
);
);
var mergeCells = new MergeCells();
using (var sheetStream = sheetEntry.Open())
using (XmlReader reader = XmlReader.Create(sheetStream, xmlSettings))
Expand Down
Loading
Loading