Skip to content

Commit ac5b33f

Browse files
feat(parser): ✨ Refactor HTML table parsing for improved category and severity handling
* Enhanced handling of `data-category` and `data-severity` attributes. * Simplified header generation logic for tables without explicit headers. * Improved error handling and code readability in `IsRunningAsRoot` method.
1 parent 702c40a commit ac5b33f

File tree

2 files changed

+133
-139
lines changed

2 files changed

+133
-139
lines changed

Sources/HtmlTinkerX/HtmlParserFromTable.cs

Lines changed: 129 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -247,21 +247,25 @@ internal static (HtmlTableMetadata Metadata, IElement[] Rows, int StartIndex) Re
247247
dict[string.IsNullOrEmpty(header) ? i.ToString() : header] = rowValues[i];
248248
}
249249
// Override Category/Severity from row-level data attributes when present.
250-
if (!string.IsNullOrEmpty(row.GetAttribute("data-category"))) {
251-
string key = row.GetAttribute("data-category")!;
252-
string val = key;
253-
if (dataValueLookup != null && dataValueLookup.TryGetValue(key, out var label)) {
254-
val = label;
250+
if (categoryIndex >= 0) {
251+
string key = (row.GetAttribute("data-category") ?? string.Empty).Trim();
252+
if (key.Length > 0) {
253+
string val = key;
254+
if (dataValueLookup != null && dataValueLookup.TryGetValue(key, out var label)) {
255+
val = label;
256+
}
257+
dict["Category"] = val;
255258
}
256-
dict["Category"] = val;
257259
}
258-
if (!string.IsNullOrEmpty(row.GetAttribute("data-severity"))) {
259-
string key = row.GetAttribute("data-severity")!;
260-
string val = key;
261-
if (dataValueLookup != null && dataValueLookup.TryGetValue(key, out var label)) {
262-
val = label;
260+
if (severityIndex >= 0) {
261+
string key = (row.GetAttribute("data-severity") ?? string.Empty).Trim();
262+
if (key.Length > 0) {
263+
string val = key;
264+
if (dataValueLookup != null && dataValueLookup.TryGetValue(key, out var label)) {
265+
val = label;
266+
}
267+
dict["Severity"] = val;
263268
}
264-
dict["Severity"] = val;
265269
}
266270
// Second chance: fill Category/Severity from data-* even if a non-empty placeholder was present.
267271
if (dict.TryGetValue("Category", out var catVal) && string.IsNullOrWhiteSpace(catVal)) {
@@ -338,45 +342,46 @@ internal static (HtmlTableMetadata Metadata, IElement[] Rows, int StartIndex) Re
338342
headers.Add(header);
339343
}
340344
}
341-
} else {
342-
for (int i = 0; i < headerCells.Length; i++) {
343-
headers.Add($"Column{i + 1}");
344-
}
345-
}
346-
345+
} else {
346+
int columnCount = SumColSpans(headerCells);
347+
for (int i = 0; i < columnCount; i++) {
348+
headers.Add($"Column{i + 1}");
349+
}
350+
}
351+
347352
int startIndex = hasHeader ? headerRowIndex + 1 : 0;
348353
List<Dictionary<string, string?>> tableRows = new();
349354
Dictionary<int, (string? Value, int Remaining)> rowSpans = new();
350355
int categoryIndex = headers.FindIndex(h => h.Equals("Category", StringComparison.OrdinalIgnoreCase));
351356
int severityIndex = headers.FindIndex(h => h.Equals("Severity", StringComparison.OrdinalIgnoreCase));
352357
foreach (var row in rows.Skip(startIndex)) {
353-
if (row == null) {
354-
continue;
355-
}
356-
var cells = row.QuerySelectorAll("th,td");
357-
string?[] rowValues = new string?[headers.Count];
358-
int col = 0;
359-
int cellIndex = 0;
360-
while (col < headers.Count) {
361-
if (rowSpans.TryGetValue(col, out var span)) {
362-
rowValues[col] = span.Value;
363-
if (--span.Remaining == 0) {
364-
rowSpans.Remove(col);
365-
} else {
366-
rowSpans[col] = span;
367-
}
368-
col++;
369-
continue;
370-
}
371-
372-
if (cellIndex < cells.Length) {
358+
if (row == null) {
359+
continue;
360+
}
361+
var cells = row.QuerySelectorAll("th,td");
362+
string?[] rowValues = new string?[headers.Count];
363+
int col = 0;
364+
int cellIndex = 0;
365+
while (col < headers.Count) {
366+
if (rowSpans.TryGetValue(col, out var span)) {
367+
rowValues[col] = span.Value;
368+
if (--span.Remaining == 0) {
369+
rowSpans.Remove(col);
370+
} else {
371+
rowSpans[col] = span;
372+
}
373+
col++;
374+
continue;
375+
}
376+
377+
if (cellIndex < cells.Length) {
373378
var cell = cells[cellIndex++];
374379
string value = FormatCellText(cell, HtmlCellTextFormat.Compact);
375-
if (replaceContent != null) {
376-
foreach (var kv in replaceContent) {
380+
if (replaceContent != null) {
381+
foreach (var kv in replaceContent) {
377382
value = ReplaceCaseInsensitive(value, kv.Key, kv.Value);
378-
}
379-
}
383+
}
384+
}
380385
int colspan = 1;
381386
int rowspan = 1;
382387
if (int.TryParse(cell.GetAttribute("colspan"), NumberStyles.Integer, CultureInfo.InvariantCulture, out int cs2)) {
@@ -385,61 +390,61 @@ internal static (HtmlTableMetadata Metadata, IElement[] Rows, int StartIndex) Re
385390
if (int.TryParse(cell.GetAttribute("rowspan"), NumberStyles.Integer, CultureInfo.InvariantCulture, out int rs2)) {
386391
rowspan = rs2;
387392
}
388-
for (int c = 0; c < colspan && col < headers.Count; c++, col++) {
389-
rowValues[col] = value;
390-
if (rowspan > 1) {
391-
rowSpans[col] = (value, rowspan - 1);
392-
}
393-
}
394-
} else {
395-
if (allProperties) {
396-
rowValues[col] = null;
397-
}
398-
col++;
399-
}
400-
}
401-
402-
Dictionary<string, string?> dict = new();
403-
for (int i = 0; i < headers.Count; i++) {
404-
string header = headers[i];
405-
dict[string.IsNullOrEmpty(header) ? i.ToString() : header] = rowValues[i];
406-
}
407-
if (categoryIndex >= 0) {
408-
string key = (row.GetAttribute("data-category") ?? string.Empty).Trim();
409-
if (key.Length > 0) {
410-
string val = key;
411-
if (dataValueLookup != null && dataValueLookup.TryGetValue(key, out var label)) {
412-
val = label;
393+
for (int c = 0; c < colspan && col < headers.Count; c++, col++) {
394+
rowValues[col] = value;
395+
if (rowspan > 1) {
396+
rowSpans[col] = (value, rowspan - 1);
397+
}
398+
}
399+
} else {
400+
if (allProperties) {
401+
rowValues[col] = null;
402+
}
403+
col++;
413404
}
414-
dict["Category"] = val;
415405
}
416-
}
417-
if (severityIndex >= 0) {
418-
string key = (row.GetAttribute("data-severity") ?? string.Empty).Trim();
419-
if (key.Length > 0) {
420-
string val = key;
421-
if (dataValueLookup != null && dataValueLookup.TryGetValue(key, out var label)) {
422-
val = label;
406+
407+
Dictionary<string, string?> dict = new();
408+
for (int i = 0; i < headers.Count; i++) {
409+
string header = headers[i];
410+
dict[string.IsNullOrEmpty(header) ? i.ToString() : header] = rowValues[i];
411+
}
412+
if (categoryIndex >= 0) {
413+
string key = (row.GetAttribute("data-category") ?? string.Empty).Trim();
414+
if (key.Length > 0) {
415+
string val = key;
416+
if (dataValueLookup.TryGetValue(key, out var label)) {
417+
val = label;
418+
}
419+
dict["Category"] = val;
423420
}
424-
dict["Severity"] = val;
421+
}
422+
if (severityIndex >= 0) {
423+
string key = (row.GetAttribute("data-severity") ?? string.Empty).Trim();
424+
if (key.Length > 0) {
425+
string val = key;
426+
if (dataValueLookup.TryGetValue(key, out var label)) {
427+
val = label;
428+
}
429+
dict["Severity"] = val;
430+
}
431+
}
432+
if (dict.TryGetValue("Category", out var catVal) && string.IsNullOrWhiteSpace(catVal)) {
433+
dict["Category"] = FillFromDataAttributes("Category", catVal, row, dataValueLookup);
434+
}
435+
if (dict.TryGetValue("Severity", out var sevVal) && string.IsNullOrWhiteSpace(sevVal)) {
436+
dict["Severity"] = FillFromDataAttributes("Severity", sevVal, row, dataValueLookup);
437+
}
438+
if (dict.Count > 0) {
439+
tableRows.Add(dict);
425440
}
426441
}
427-
if (dict.TryGetValue("Category", out var catVal) && string.IsNullOrWhiteSpace(catVal)) {
428-
dict["Category"] = FillFromDataAttributes("Category", catVal, row, dataValueLookup);
429-
}
430-
if (dict.TryGetValue("Severity", out var sevVal) && string.IsNullOrWhiteSpace(sevVal)) {
431-
dict["Severity"] = FillFromDataAttributes("Severity", sevVal, row, dataValueLookup);
432-
}
433-
if (dict.Count > 0) {
434-
tableRows.Add(dict);
442+
443+
if (tableRows.Count > 0) {
444+
result.Add(tableRows);
435445
}
436446
}
437447

438-
if (tableRows.Count > 0) {
439-
result.Add(tableRows);
440-
}
441-
}
442-
443448
return result;
444449
}
445450

@@ -802,13 +807,11 @@ public static List<HtmlTableResult> ParseTablesWithHtmlAgilityPackDetailed(
802807
if (rows == null || rows.Count == 0) {
803808
continue;
804809
}
805-
var dataValueLookup = BuildDataValueLookup(table);
806-
807810
if (reverseTable) {
808811
Dictionary<string, string?> obj = new();
809-
int index = 0;
810-
foreach (var row in rows) {
811-
if (row == null) {
812+
int index = 0;
813+
foreach (var row in rows) {
814+
if (row == null) {
812815
continue;
813816
}
814817
var cells = row.SelectNodes("th|td");
@@ -835,14 +838,16 @@ public static List<HtmlTableResult> ParseTablesWithHtmlAgilityPackDetailed(
835838

836839
if (obj.Count > 0) {
837840
result.Add(new List<Dictionary<string, string?>> { obj });
838-
}
839-
continue;
840-
}
841-
842-
int headerRowIndex = 0;
843-
bool hasHeader = false;
844-
for (int i = 0; i < rows.Count; i++) {
845-
if (rows[i].SelectNodes("th")?.Count > 0) {
841+
}
842+
continue;
843+
}
844+
845+
var dataValueLookup = BuildDataValueLookup(table);
846+
847+
int headerRowIndex = 0;
848+
bool hasHeader = false;
849+
for (int i = 0; i < rows.Count; i++) {
850+
if (rows[i].SelectNodes("th")?.Count > 0) {
846851
headerRowIndex = i;
847852
hasHeader = true;
848853
break;
@@ -871,13 +876,14 @@ public static List<HtmlTableResult> ParseTablesWithHtmlAgilityPackDetailed(
871876
}
872877
for (int c = 0; c < colspan; c++) {
873878
headers.Add(header);
874-
}
875-
}
876-
} else {
877-
for (int i = 0; i < headerCells.Count; i++) {
878-
headers.Add($"Column{i + 1}");
879-
}
880-
}
879+
}
880+
}
881+
} else {
882+
int columnCount = SumColSpans(headerCells);
883+
for (int i = 0; i < columnCount; i++) {
884+
headers.Add($"Column{i + 1}");
885+
}
886+
}
881887

882888
int startIndex = hasHeader ? headerRowIndex + 1 : 0;
883889
List<Dictionary<string, string?>> tableRows = new();
@@ -921,15 +927,15 @@ public static List<HtmlTableResult> ParseTablesWithHtmlAgilityPackDetailed(
921927
if (int.TryParse(cell.GetAttributeValue("rowspan", "1"), NumberStyles.Integer, CultureInfo.InvariantCulture, out int rs2)) {
922928
rowspan = rs2;
923929
}
924-
for (int c = 0; c < colspan && col < headers.Count; c++, col++) {
925-
rowValues[col] = value;
926-
if (rowspan > 1) {
927-
rowSpans[col] = (value, rowspan - 1);
928-
}
929-
}
930-
} else {
931-
if (allProperties) {
932-
rowValues[col] = null;
930+
for (int c = 0; c < colspan && col < headers.Count; c++, col++) {
931+
rowValues[col] = FillFromDataAttributes(headers[col], value, row, dataValueLookup);
932+
if (rowspan > 1) {
933+
rowSpans[col] = (rowValues[col], rowspan - 1);
934+
}
935+
}
936+
} else {
937+
if (allProperties) {
938+
rowValues[col] = null;
933939
}
934940
col++;
935941
}
@@ -1123,13 +1129,9 @@ private static string CleanupHeader(string raw) {
11231129
private static IDictionary<string, string> BuildDataValueLookup(IElement table) {
11241130
var dict = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
11251131
foreach (var li in table.QuerySelectorAll("li[data-value]")) {
1126-
string key = li.GetAttribute("data-value") ?? string.Empty;
1127-
key = key.Trim();
1128-
if (key.Length == 0) {
1129-
continue;
1130-
}
11311132
string text = li.TextContent.Trim();
1132-
if (!dict.ContainsKey(key)) {
1133+
string key = (li.GetAttribute("data-value") ?? string.Empty).Trim();
1134+
if (key.Length > 0 && !dict.ContainsKey(key)) {
11331135
dict[key] = text;
11341136
}
11351137
}
@@ -1141,12 +1143,10 @@ private static IDictionary<string, string> BuildDataValueLookup(HtmlNode table)
11411143
var items = table.SelectNodes(".//li[@data-value]");
11421144
if (items != null) {
11431145
foreach (var li in items) {
1144-
string key = li.GetAttributeValue("data-value", string.Empty);
1146+
string key = li.GetAttributeValue("data-value", string.Empty).Trim();
11451147
string text = HtmlEntity.DeEntitize(li.InnerText ?? string.Empty).Trim();
1146-
if (!string.IsNullOrEmpty(key)) {
1147-
if (!dict.ContainsKey(key)) {
1148-
dict[key] = text;
1149-
}
1148+
if (key.Length > 0 && !dict.ContainsKey(key)) {
1149+
dict[key] = text;
11501150
}
11511151
}
11521152
}

Sources/HtmlTinkerX/Playwright/HtmlBrowser.Installer.cs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -492,20 +492,12 @@ private static bool ShouldUsePlaywrightWithDepsOnLinux() {
492492
return null;
493493
}
494494

495-
[DllImport("libc")]
496-
private static extern uint geteuid();
497-
498495
private static bool IsRunningAsRoot() {
499496
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) {
500497
return false;
501498
}
502499

503-
try {
504-
return geteuid() == 0;
505-
} catch {
506-
// ignore
507-
}
508-
500+
// Prefer /proc/self/status (fully managed) to avoid platform-specific P/Invoke.
509501
try {
510502
const string procStatus = "/proc/self/status";
511503
if (File.Exists(procStatus)) {
@@ -521,7 +513,9 @@ private static bool IsRunningAsRoot() {
521513
break;
522514
}
523515
}
524-
} catch {
516+
} catch (IOException) {
517+
// ignore
518+
} catch (UnauthorizedAccessException) {
525519
// ignore
526520
}
527521

0 commit comments

Comments
 (0)