Skip to content

Commit d9e2490

Browse files
authored
Merge pull request #816 from mini-software/v1.x-maintenance-fix-809
[maintenance] fix: multiple empty rows in query result due to empty first row
2 parents 1d06e96 + 580160f commit d9e2490

File tree

3 files changed

+151
-109
lines changed

3 files changed

+151
-109
lines changed

samples/xlsx/TestIssue809.xlsx

8.4 KB
Binary file not shown.

src/MiniExcel/OpenXml/ExcelOpenXmlSheetReader.cs

Lines changed: 111 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.IO;
99
using System.IO.Compression;
1010
using System.Linq;
11+
using System.Runtime.CompilerServices;
1112
using System.Threading;
1213
using System.Threading.Tasks;
1314
using System.Xml;
@@ -191,86 +192,16 @@ internal IEnumerable<IDictionary<string, object>> InternalQueryRange(bool useHea
191192
break;
192193
}
193194

194-
// fill empty rows
195-
if (!_config.IgnoreEmptyRows)
195+
foreach (var row in QueryRow(reader, isFirstRow, startRowIndex, nextRowIndex, rowIndex, startColumnIndex, endColumnIndex, maxColumnIndex, withoutCR, useHeaderRow, headRows, _mergeCells))
196196
{
197-
var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex;
198-
if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex)
197+
if (isFirstRow)
199198
{
200-
for (int i = expectedRowIndex; i < rowIndex; i++)
201-
{
202-
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
203-
}
204-
}
205-
}
206-
207-
// row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is <row r="2"/>
208-
if (!XmlReaderHelper.ReadFirstContent(reader) && !_config.IgnoreEmptyRows)
209-
{
210-
//Fill in case of self closed empty row tag eg. <row r="1"/>
211-
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
212-
continue;
213-
}
214-
215-
#region Set Cells
216-
217-
var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
218-
var columnIndex = withoutCR ? -1 : 0;
219-
while (!reader.EOF)
220-
{
221-
if (XmlReaderHelper.IsStartElement(reader, "c", _ns))
222-
{
223-
var aS = reader.GetAttribute("s");
224-
var aR = reader.GetAttribute("r");
225-
var aT = reader.GetAttribute("t");
226-
var cellValue = ReadCellAndSetColumnIndex(reader, ref columnIndex, withoutCR,
227-
startColumnIndex, aR, aT);
228-
229-
if (_config.FillMergedCells)
230-
{
231-
if (_mergeCells.MergesValues.ContainsKey(aR))
232-
{
233-
_mergeCells.MergesValues[aR] = cellValue;
234-
}
235-
else if (_mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
236-
{
237-
_mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
238-
}
239-
}
240-
241-
if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value))
199+
isFirstRow = false; // for startcell logic
200+
if (useHeaderRow)
242201
continue;
243-
244-
if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml
245-
{
246-
int xfIndex = -1;
247-
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture,
248-
out var styleIndex))
249-
xfIndex = styleIndex;
250-
251-
// only when have s attribute then load styles xml data
252-
if (_style == null)
253-
_style = new ExcelOpenXmlStyles(_archive);
254-
255-
cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
256-
}
257-
258-
SetCellsValueAndHeaders(cellValue, useHeaderRow, ref headRows, ref isFirstRow, ref cell, columnIndex);
259202
}
260-
else if (!XmlReaderHelper.SkipContent(reader))
261-
break;
262-
}
263-
264-
#endregion
265-
266-
if (isFirstRow)
267-
{
268-
isFirstRow = false; // for startcell logic
269-
if (useHeaderRow)
270-
continue;
203+
yield return row;
271204
}
272-
273-
yield return cell;
274205
}
275206
else if (!XmlReaderHelper.SkipContent(reader))
276207
{
@@ -286,6 +217,92 @@ internal IEnumerable<IDictionary<string, object>> InternalQueryRange(bool useHea
286217
}
287218
}
288219

220+
private IEnumerable<IDictionary<string, object>> QueryRow(
221+
XmlReader reader,
222+
bool isFirstRow,
223+
int startRowIndex,
224+
int nextRowIndex,
225+
int rowIndex,
226+
int startColumnIndex,
227+
int? endColumnIndex,
228+
int maxColumnIndex,
229+
bool withoutCR,
230+
bool useHeaderRow,
231+
Dictionary<int, string> headRows,
232+
MergeCells mergeCells)
233+
{
234+
// fill empty rows
235+
if (!_config.IgnoreEmptyRows)
236+
{
237+
var expectedRowIndex = isFirstRow ? startRowIndex : nextRowIndex;
238+
if (startRowIndex <= expectedRowIndex && expectedRowIndex < rowIndex)
239+
{
240+
for (int i = expectedRowIndex; i < rowIndex; i++)
241+
{
242+
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
243+
}
244+
}
245+
}
246+
247+
// row -> c, must after `if (nextRowIndex < rowIndex)` condition code, eg. The first empty row has no xml element,and the second row xml element is <row r="2"/>
248+
if (!XmlReaderHelper.ReadFirstContent(reader) && !_config.IgnoreEmptyRows)
249+
{
250+
//Fill in case of self closed empty row tag eg. <row r="1"/>
251+
yield return GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
252+
yield break;
253+
}
254+
255+
var cell = GetCell(useHeaderRow, maxColumnIndex, headRows, startColumnIndex);
256+
var columnIndex = withoutCR ? -1 : 0;
257+
while (!reader.EOF)
258+
{
259+
if (XmlReaderHelper.IsStartElement(reader, "c", _ns))
260+
{
261+
var aS = reader.GetAttribute("s");
262+
var aR = reader.GetAttribute("r");
263+
var aT = reader.GetAttribute("t");
264+
var cellAndColumn = ReadCellAndSetColumnIndex(reader, columnIndex, withoutCR, startColumnIndex, aR, aT);
265+
266+
var cellValue = cellAndColumn.CellValue;
267+
columnIndex = cellAndColumn.ColumnIndex;
268+
269+
if (_config.FillMergedCells)
270+
{
271+
if (mergeCells.MergesValues.ContainsKey(aR))
272+
{
273+
mergeCells.MergesValues[aR] = cellValue;
274+
}
275+
else if (mergeCells.MergesMap.TryGetValue(aR, out var mergeKey))
276+
{
277+
mergeCells.MergesValues.TryGetValue(mergeKey, out cellValue);
278+
}
279+
}
280+
281+
if (columnIndex < startColumnIndex || (endColumnIndex.HasValue && columnIndex > endColumnIndex.Value))
282+
continue;
283+
284+
if (!string.IsNullOrEmpty(aS)) // if c with s meaning is custom style need to check type by xl/style.xml
285+
{
286+
int xfIndex = -1;
287+
if (int.TryParse(aS, NumberStyles.Any, CultureInfo.InvariantCulture,
288+
out var styleIndex))
289+
xfIndex = styleIndex;
290+
291+
// only when have s attribute then load styles xml data
292+
if (_style == null)
293+
_style = new ExcelOpenXmlStyles(_archive);
294+
295+
cellValue = _style.ConvertValueByStyleFormat(xfIndex, cellValue);
296+
}
297+
298+
SetCellsValueAndHeaders(cellValue, useHeaderRow, headRows, isFirstRow, cell, columnIndex);
299+
}
300+
else if (!XmlReaderHelper.SkipContent(reader))
301+
break;
302+
}
303+
yield return cell;
304+
}
305+
289306
public static IEnumerable<T> QueryImpl<T>(IEnumerable<IDictionary<string, object>> values, string startCell, bool hasHeader, Configuration configuration) where T : class, new()
290307
{
291308
var type = typeof(T);
@@ -354,7 +371,7 @@ internal IEnumerable<IDictionary<string, object>> InternalQueryRange(bool useHea
354371
yield return v;
355372
}
356373
}
357-
374+
358375
private ZipArchiveEntry GetSheetEntry(string sheetName)
359376
{
360377
// if sheets count > 1 need to read xl/_rels/workbook.xml.rels
@@ -396,7 +413,7 @@ private static IDictionary<string, object> GetCell(bool useHeaderRow, int maxCol
396413
return useHeaderRow ? CustomPropertyHelper.GetEmptyExpandoObject(headRows) : CustomPropertyHelper.GetEmptyExpandoObject(maxColumnIndex, startColumnIndex);
397414
}
398415

399-
private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, ref Dictionary<int, string> headRows, ref bool isFirstRow, ref IDictionary<string, object> cell, int columnIndex)
416+
private static void SetCellsValueAndHeaders(object cellValue, bool useHeaderRow, Dictionary<int, string> headRows, bool isFirstRow, IDictionary<string, object> cell, int columnIndex)
400417
{
401418
if (!useHeaderRow)
402419
{
@@ -557,7 +574,19 @@ internal List<SheetRecord> GetWorkbookRels(ReadOnlyCollection<ZipArchiveEntry> e
557574
return sheetRecords;
558575
}
559576

560-
private object ReadCellAndSetColumnIndex(XmlReader reader, ref int columnIndex, bool withoutCR, int startColumnIndex, string aR, string aT)
577+
internal class CellAndColumn
578+
{
579+
public object CellValue { get; }
580+
public int ColumnIndex { get; } = -1;
581+
582+
public CellAndColumn(object cellValue, int columnIndex)
583+
{
584+
CellValue = cellValue;
585+
ColumnIndex = columnIndex;
586+
}
587+
}
588+
589+
private CellAndColumn ReadCellAndSetColumnIndex(XmlReader reader, int columnIndex, bool withoutCR, int startColumnIndex, string aR, string aT)
561590
{
562591
const int xfIndex = -1;
563592
int newColumnIndex;
@@ -576,17 +605,17 @@ private object ReadCellAndSetColumnIndex(XmlReader reader, ref int columnIndex,
576605
if (columnIndex < startColumnIndex)
577606
{
578607
if (!XmlReaderHelper.ReadFirstContent(reader))
579-
return null;
608+
return new CellAndColumn(null, columnIndex);
580609

581610
while (!reader.EOF)
582611
if (!XmlReaderHelper.SkipContent(reader))
583612
break;
584613

585-
return null;
614+
return new CellAndColumn(null, columnIndex);
586615
}
587616

588617
if (!XmlReaderHelper.ReadFirstContent(reader))
589-
return null;
618+
return new CellAndColumn(null, columnIndex);
590619

591620
object value = null;
592621
while (!reader.EOF)
@@ -609,7 +638,7 @@ private object ReadCellAndSetColumnIndex(XmlReader reader, ref int columnIndex,
609638
}
610639
}
611640

612-
return value;
641+
return new CellAndColumn(value, columnIndex);
613642
}
614643

615644
private void ConvertCellValue(string rawValue, string aT, int xfIndex, out object value)

0 commit comments

Comments
 (0)