feat: implement MarkdownTableParser for parsing test steps from markdown tables

thomasduft · thomasduft · commit 46c7ec697646 · 2025-08-01T16:48:41.000Z
diff --git a/src/testr.Cli/Domain/MarkdownTableParser.cs b/src/testr.Cli/Domain/MarkdownTableParser.cs
@@ -0,0 +1,262 @@
+using System.Text.RegularExpressions;
+
+namespace tomware.TestR;
+
+/// <summary>
+/// A lightweight markdown table parser specifically designed for parsing test steps
+/// from markdown tables in the format used by TestR test cases.
+/// </summary>
+internal class MarkdownTableParser
+{
+  private readonly string _content;
+
+  public MarkdownTableParser(string content)
+  {
+    _content = content;
+  }
+
+  /// <summary>
+  /// Parses test steps from markdown tables found between STEPS:BEGIN and STEPS:END comments.
+  /// Expected table format:
+  /// | Step ID | Description | Test Data | Expected Result | Actual Result |
+  /// | -------:| ----------- | --------- | --------------- | ------------- |
+  /// | 1       | step desc   | test data | expected result | actual result |
+  /// </summary>
+  /// <returns>Collection of parsed test steps</returns>
+  public IEnumerable<TestStep> ParseTestSteps()
+  {
+    var testSteps = new List<TestStep>();
+
+    // Find the test steps section between comments
+    var stepsSection = ExtractStepsSection();
+    if (string.IsNullOrEmpty(stepsSection))
+    {
+      return testSteps;
+    }
+
+    // Parse the table rows
+    var tableRows = ExtractTableRows(stepsSection);
+
+    foreach (var row in tableRows)
+    {
+      var testStep = ParseTableRow(row);
+      if (testStep != null)
+      {
+        testSteps.Add(testStep);
+      }
+    }
+
+    return testSteps.OrderBy(ts => ts.Id);
+  }
+
+  /// <summary>
+  /// Extracts the content between <!-- STEPS:BEGIN --> and <!-- STEPS:END --> comments
+  /// </summary>
+  private string ExtractStepsSection()
+  {
+    var beginPattern = @"<!--\s*STEPS:BEGIN\s*-->";
+    var endPattern = @"<!--\s*STEPS:END\s*-->";
+
+    var beginMatch = Regex.Match(_content, beginPattern, RegexOptions.IgnoreCase);
+    if (!beginMatch.Success)
+    {
+      return string.Empty;
+    }
+
+    var endMatch = Regex.Match(_content, endPattern, RegexOptions.IgnoreCase);
+    if (!endMatch.Success || endMatch.Index <= beginMatch.Index)
+    {
+      return string.Empty;
+    }
+
+    var startIndex = beginMatch.Index + beginMatch.Length;
+    var length = endMatch.Index - startIndex;
+
+    return _content.Substring(startIndex, length);
+  }
+
+  /// <summary>
+  /// Extracts table rows from the steps section, excluding the header and separator rows
+  /// </summary>
+  private List<string> ExtractTableRows(string stepsSection)
+  {
+    var lines = stepsSection.Split('\n', StringSplitOptions.RemoveEmptyEntries)
+      .Select(line => line.Trim())
+      .Where(line => !string.IsNullOrEmpty(line))
+      .ToList();
+
+    var tableRows = new List<string>();
+    var foundHeader = false;
+    var foundSeparator = false;
+
+    foreach (var line in lines)
+    {
+      // Check if this is a table row (starts and ends with |)
+      if (!line.StartsWith("|") || !line.EndsWith("|"))
+      {
+        continue;
+      }
+
+      // Skip the header row (first table row we encounter)
+      if (!foundHeader)
+      {
+        foundHeader = true;
+        continue;
+      }
+
+      // Skip the separator row (contains only |, -, :, and spaces)
+      if (!foundSeparator && IsSeparatorRow(line))
+      {
+        foundSeparator = true;
+        continue;
+      }
+
+      // This is a data row
+      if (foundSeparator)
+      {
+        tableRows.Add(line);
+      }
+    }
+
+    return tableRows;
+  }
+
+  /// <summary>
+  /// Checks if a line is a markdown table separator row
+  /// </summary>
+  private static bool IsSeparatorRow(string line)
+  {
+    // Remove outer pipes and check if content only contains allowed separator characters
+    var content = line.Trim('|', ' ');
+    return Regex.IsMatch(content, @"^[\s\-:|]+$");
+  }
+
+  /// <summary>
+  /// Parses a single table row into a TestStep object
+  /// </summary>
+  private TestStep? ParseTableRow(string row)
+  {
+    try
+    {
+      var cells = ParseTableCells(row);
+
+      // Ensure we have at least 4 cells (Step ID, Description, Test Data, Expected Result)
+      // The 5th cell (Actual Result) is optional
+      if (cells.Count < 4)
+      {
+        return null;
+      }
+
+      var testStep = new TestStep();
+
+      // Parse Step ID (first cell)
+      if (int.TryParse(cells[0].Trim(), out var stepId))
+      {
+        testStep.Id = stepId;
+      }
+      else
+      {
+        return null; // Invalid step ID
+      }
+
+      // Parse Description (second cell)
+      testStep.Description = UnescapeMarkdown(cells[1].Trim());
+
+      // Parse Test Data (third cell)
+      testStep.TestData = UnescapeMarkdown(cells[2].Trim());
+
+      // Parse Expected Result (fourth cell)
+      testStep.ExpectedResult = UnescapeMarkdown(cells[3].Trim());
+
+      // Parse Actual Result (fifth cell, optional)
+      if (cells.Count > 4)
+      {
+        var actualResult = cells[4].Trim();
+        // Check for success/failure indicators
+        testStep.IsSuccess = actualResult.Contains("✅") ||
+                           actualResult.Equals("-", StringComparison.OrdinalIgnoreCase);
+      }
+
+      return testStep;
+    }
+    catch
+    {
+      // If parsing fails for any reason, return null
+      return null;
+    }
+  }
+
+  /// <summary>
+  /// Parses table cells from a row, handling escaped pipes within cell content
+  /// </summary>
+  private List<string> ParseTableCells(string row)
+  {
+    var cells = new List<string>();
+    var currentCell = string.Empty;
+
+    // Remove leading and trailing pipes
+    var content = row.Trim();
+    if (content.StartsWith("|"))
+    {
+      content = content.Substring(1);
+    }
+    if (content.EndsWith("|"))
+    {
+      content = content.Substring(0, content.Length - 1);
+    }
+
+    for (int i = 0; i < content.Length; i++)
+    {
+      var c = content[i];
+
+      if (c == '\\' && i + 1 < content.Length)
+      {
+        // Handle escaped characters
+        var nextChar = content[i + 1];
+        if (nextChar == '|' || nextChar == '\\')
+        {
+          currentCell += nextChar;
+          i++; // Skip the next character
+          continue;
+        }
+      }
+
+      if (c == '|')
+      {
+        // Cell separator
+        cells.Add(currentCell);
+        currentCell = string.Empty;
+      }
+      else
+      {
+        currentCell += c;
+      }
+    }
+
+    // Add the last cell
+    cells.Add(currentCell);
+
+    return cells;
+  }
+
+  /// <summary>
+  /// Unescapes markdown content, particularly handling escaped quotes and special characters
+  /// </summary>
+  private static string UnescapeMarkdown(string content)
+  {
+    if (string.IsNullOrEmpty(content))
+    {
+      return content;
+    }
+
+    // Handle HTML entities that might be present
+    content = content
+      .Replace("&quot;", "\"")
+      .Replace("&amp;", "&")
+      .Replace("&lt;", "<")
+      .Replace("&gt;", ">")
+      .Replace("&nbsp;", " ");
+
+    return content;
+  }
+}
diff --git a/src/testr.Cli/Domain/TestCaseParser.cs b/src/testr.Cli/Domain/TestCaseParser.cs
@@ -1,9 +1,5 @@
 using System.Text.RegularExpressions;
 
-using HtmlAgilityPack;
-
-using Markdig;
-
 namespace tomware.TestR;
 
 internal class TestCaseParser
@@ -47,59 +43,8 @@ internal async Task<TestCase> ToTestCaseAsync(CancellationToken cancellationToke
 
   private IEnumerable<TestStep> GetTestSteps(string markdownContent)
   {
-    var testSteps = new List<TestStep>();
-
-    var pipeline = new MarkdownPipelineBuilder()
-        .UseAdvancedExtensions()
-        .Build();
-
-    var html = Markdown.ToHtml(markdownContent, pipeline);
-    var hap = new HtmlDocument();
-    hap.LoadHtml(html);
-
-    // Extract the table content
-    var tableNodes = hap.DocumentNode
-      .Descendants("table")
-      .ToList();
-    foreach (var tableNode in tableNodes)
-    {
-      // Extract rows from the table
-      var rowNodes = tableNode.Descendants("tr").ToList();
-
-      foreach (var rowNode in rowNodes.Skip(1))
-      {
-        // Extract cells from the row
-        var testStep = new TestStep();
-        var cellNodes = rowNode.Descendants("td").ToList();
-
-        // List each cell's content
-        for (var i = 0; i < cellNodes.Count; i++)
-        {
-          var cellNode = cellNodes[i];
-          var cellContent = cellNode.InnerText.Trim();
-
-          switch (i)
-          {
-            case 0:
-              testStep.Id = int.Parse(cellContent);
-              break;
-            case 1:
-              testStep.Description = SanitizeWebString(cellContent);
-              break;
-            case 2:
-              testStep.TestData = SanitizeWebString(cellContent);
-              break;
-            case 3:
-              testStep.ExpectedResult = SanitizeWebString(cellContent);
-              break;
-          }
-        }
-
-        testSteps.Add(testStep);
-      }
-    }
-
-    return testSteps.OrderBy(ts => ts.Id);
+    var parser = new MarkdownTableParser(markdownContent);
+    return parser.ParseTestSteps();
   }
 
   private (string TestCaseId, string TestCaseTitle) GetTestCaseIdAndTitle(string[] lines)
@@ -127,12 +72,6 @@ private IEnumerable<TestStep> GetTestSteps(string markdownContent)
     return splittedItems[1].Trim();
   }
 
-  private string SanitizeWebString(string input)
-  {
-    return input
-      .Replace("&quot;", "\"");
-  }
-
   private string? GetLinkedFile(string file, string? link)
   {
     if (string.IsNullOrWhiteSpace(link)) return null;
diff --git a/src/testr.Cli/testr.Cli.csproj b/src/testr.Cli/testr.Cli.csproj
@@ -38,8 +38,6 @@
   <ItemGroup>
     <PackageReference Include="CliWrap" Version="3.9.0" />
     <PackageReference Include="Fluid.Core" Version="2.25.0" />
-    <PackageReference Include="HtmlAgilityPack" Version="1.12.2" />
-    <PackageReference Include="Markdig" Version="0.41.3" />
     <PackageReference Include="McMaster.Extensions.CommandLineUtils" Version="4.1.1" />
     <PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="9.0.7" />
     <PackageReference Include="Microsoft.Playwright" Version="1.54.0" />
diff --git a/src/testr.Tests/MarkdownTableParserTests.cs b/src/testr.Tests/MarkdownTableParserTests.cs