Skip to content

Commit 46c7ec6

Browse files
committed
feat: implement MarkdownTableParser for parsing test steps from markdown tables
1 parent 08101e2 commit 46c7ec6

File tree

4 files changed

+501
-65
lines changed

4 files changed

+501
-65
lines changed
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
using System.Text.RegularExpressions;
2+
3+
namespace tomware.TestR;
4+
5+
/// <summary>
6+
/// A lightweight markdown table parser specifically designed for parsing test steps
7+
/// from markdown tables in the format used by TestR test cases.
8+
/// </summary>
9+
internal class MarkdownTableParser
10+
{
11+
private readonly string _content;
12+
13+
public MarkdownTableParser(string content)
14+
{
15+
_content = content;
16+
}
17+
18+
/// <summary>
19+
/// Parses test steps from markdown tables found between STEPS:BEGIN and STEPS:END comments.
20+
/// Expected table format:
21+
/// | Step ID | Description | Test Data | Expected Result | Actual Result |
22+
/// | -------:| ----------- | --------- | --------------- | ------------- |
23+
/// | 1 | step desc | test data | expected result | actual result |
24+
/// </summary>
25+
/// <returns>Collection of parsed test steps</returns>
26+
public IEnumerable<TestStep> ParseTestSteps()
27+
{
28+
var testSteps = new List<TestStep>();
29+
30+
// Find the test steps section between comments
31+
var stepsSection = ExtractStepsSection();
32+
if (string.IsNullOrEmpty(stepsSection))
33+
{
34+
return testSteps;
35+
}
36+
37+
// Parse the table rows
38+
var tableRows = ExtractTableRows(stepsSection);
39+
40+
foreach (var row in tableRows)
41+
{
42+
var testStep = ParseTableRow(row);
43+
if (testStep != null)
44+
{
45+
testSteps.Add(testStep);
46+
}
47+
}
48+
49+
return testSteps.OrderBy(ts => ts.Id);
50+
}
51+
52+
/// <summary>
53+
/// Extracts the content between <!-- STEPS:BEGIN --> and <!-- STEPS:END --> comments
54+
/// </summary>
55+
private string ExtractStepsSection()
56+
{
57+
var beginPattern = @"<!--\s*STEPS:BEGIN\s*-->";
58+
var endPattern = @"<!--\s*STEPS:END\s*-->";
59+
60+
var beginMatch = Regex.Match(_content, beginPattern, RegexOptions.IgnoreCase);
61+
if (!beginMatch.Success)
62+
{
63+
return string.Empty;
64+
}
65+
66+
var endMatch = Regex.Match(_content, endPattern, RegexOptions.IgnoreCase);
67+
if (!endMatch.Success || endMatch.Index <= beginMatch.Index)
68+
{
69+
return string.Empty;
70+
}
71+
72+
var startIndex = beginMatch.Index + beginMatch.Length;
73+
var length = endMatch.Index - startIndex;
74+
75+
return _content.Substring(startIndex, length);
76+
}
77+
78+
/// <summary>
79+
/// Extracts table rows from the steps section, excluding the header and separator rows
80+
/// </summary>
81+
private List<string> ExtractTableRows(string stepsSection)
82+
{
83+
var lines = stepsSection.Split('\n', StringSplitOptions.RemoveEmptyEntries)
84+
.Select(line => line.Trim())
85+
.Where(line => !string.IsNullOrEmpty(line))
86+
.ToList();
87+
88+
var tableRows = new List<string>();
89+
var foundHeader = false;
90+
var foundSeparator = false;
91+
92+
foreach (var line in lines)
93+
{
94+
// Check if this is a table row (starts and ends with |)
95+
if (!line.StartsWith("|") || !line.EndsWith("|"))
96+
{
97+
continue;
98+
}
99+
100+
// Skip the header row (first table row we encounter)
101+
if (!foundHeader)
102+
{
103+
foundHeader = true;
104+
continue;
105+
}
106+
107+
// Skip the separator row (contains only |, -, :, and spaces)
108+
if (!foundSeparator && IsSeparatorRow(line))
109+
{
110+
foundSeparator = true;
111+
continue;
112+
}
113+
114+
// This is a data row
115+
if (foundSeparator)
116+
{
117+
tableRows.Add(line);
118+
}
119+
}
120+
121+
return tableRows;
122+
}
123+
124+
/// <summary>
125+
/// Checks if a line is a markdown table separator row
126+
/// </summary>
127+
private static bool IsSeparatorRow(string line)
128+
{
129+
// Remove outer pipes and check if content only contains allowed separator characters
130+
var content = line.Trim('|', ' ');
131+
return Regex.IsMatch(content, @"^[\s\-:|]+$");
132+
}
133+
134+
/// <summary>
135+
/// Parses a single table row into a TestStep object
136+
/// </summary>
137+
private TestStep? ParseTableRow(string row)
138+
{
139+
try
140+
{
141+
var cells = ParseTableCells(row);
142+
143+
// Ensure we have at least 4 cells (Step ID, Description, Test Data, Expected Result)
144+
// The 5th cell (Actual Result) is optional
145+
if (cells.Count < 4)
146+
{
147+
return null;
148+
}
149+
150+
var testStep = new TestStep();
151+
152+
// Parse Step ID (first cell)
153+
if (int.TryParse(cells[0].Trim(), out var stepId))
154+
{
155+
testStep.Id = stepId;
156+
}
157+
else
158+
{
159+
return null; // Invalid step ID
160+
}
161+
162+
// Parse Description (second cell)
163+
testStep.Description = UnescapeMarkdown(cells[1].Trim());
164+
165+
// Parse Test Data (third cell)
166+
testStep.TestData = UnescapeMarkdown(cells[2].Trim());
167+
168+
// Parse Expected Result (fourth cell)
169+
testStep.ExpectedResult = UnescapeMarkdown(cells[3].Trim());
170+
171+
// Parse Actual Result (fifth cell, optional)
172+
if (cells.Count > 4)
173+
{
174+
var actualResult = cells[4].Trim();
175+
// Check for success/failure indicators
176+
testStep.IsSuccess = actualResult.Contains("✅") ||
177+
actualResult.Equals("-", StringComparison.OrdinalIgnoreCase);
178+
}
179+
180+
return testStep;
181+
}
182+
catch
183+
{
184+
// If parsing fails for any reason, return null
185+
return null;
186+
}
187+
}
188+
189+
/// <summary>
190+
/// Parses table cells from a row, handling escaped pipes within cell content
191+
/// </summary>
192+
private List<string> ParseTableCells(string row)
193+
{
194+
var cells = new List<string>();
195+
var currentCell = string.Empty;
196+
197+
// Remove leading and trailing pipes
198+
var content = row.Trim();
199+
if (content.StartsWith("|"))
200+
{
201+
content = content.Substring(1);
202+
}
203+
if (content.EndsWith("|"))
204+
{
205+
content = content.Substring(0, content.Length - 1);
206+
}
207+
208+
for (int i = 0; i < content.Length; i++)
209+
{
210+
var c = content[i];
211+
212+
if (c == '\\' && i + 1 < content.Length)
213+
{
214+
// Handle escaped characters
215+
var nextChar = content[i + 1];
216+
if (nextChar == '|' || nextChar == '\\')
217+
{
218+
currentCell += nextChar;
219+
i++; // Skip the next character
220+
continue;
221+
}
222+
}
223+
224+
if (c == '|')
225+
{
226+
// Cell separator
227+
cells.Add(currentCell);
228+
currentCell = string.Empty;
229+
}
230+
else
231+
{
232+
currentCell += c;
233+
}
234+
}
235+
236+
// Add the last cell
237+
cells.Add(currentCell);
238+
239+
return cells;
240+
}
241+
242+
/// <summary>
243+
/// Unescapes markdown content, particularly handling escaped quotes and special characters
244+
/// </summary>
245+
private static string UnescapeMarkdown(string content)
246+
{
247+
if (string.IsNullOrEmpty(content))
248+
{
249+
return content;
250+
}
251+
252+
// Handle HTML entities that might be present
253+
content = content
254+
.Replace("&quot;", "\"")
255+
.Replace("&amp;", "&")
256+
.Replace("&lt;", "<")
257+
.Replace("&gt;", ">")
258+
.Replace("&nbsp;", " ");
259+
260+
return content;
261+
}
262+
}

src/testr.Cli/Domain/TestCaseParser.cs

Lines changed: 2 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
using System.Text.RegularExpressions;
22

3-
using HtmlAgilityPack;
4-
5-
using Markdig;
6-
73
namespace tomware.TestR;
84

95
internal class TestCaseParser
@@ -47,59 +43,8 @@ internal async Task<TestCase> ToTestCaseAsync(CancellationToken cancellationToke
4743

4844
private IEnumerable<TestStep> GetTestSteps(string markdownContent)
4945
{
50-
var testSteps = new List<TestStep>();
51-
52-
var pipeline = new MarkdownPipelineBuilder()
53-
.UseAdvancedExtensions()
54-
.Build();
55-
56-
var html = Markdown.ToHtml(markdownContent, pipeline);
57-
var hap = new HtmlDocument();
58-
hap.LoadHtml(html);
59-
60-
// Extract the table content
61-
var tableNodes = hap.DocumentNode
62-
.Descendants("table")
63-
.ToList();
64-
foreach (var tableNode in tableNodes)
65-
{
66-
// Extract rows from the table
67-
var rowNodes = tableNode.Descendants("tr").ToList();
68-
69-
foreach (var rowNode in rowNodes.Skip(1))
70-
{
71-
// Extract cells from the row
72-
var testStep = new TestStep();
73-
var cellNodes = rowNode.Descendants("td").ToList();
74-
75-
// List each cell's content
76-
for (var i = 0; i < cellNodes.Count; i++)
77-
{
78-
var cellNode = cellNodes[i];
79-
var cellContent = cellNode.InnerText.Trim();
80-
81-
switch (i)
82-
{
83-
case 0:
84-
testStep.Id = int.Parse(cellContent);
85-
break;
86-
case 1:
87-
testStep.Description = SanitizeWebString(cellContent);
88-
break;
89-
case 2:
90-
testStep.TestData = SanitizeWebString(cellContent);
91-
break;
92-
case 3:
93-
testStep.ExpectedResult = SanitizeWebString(cellContent);
94-
break;
95-
}
96-
}
97-
98-
testSteps.Add(testStep);
99-
}
100-
}
101-
102-
return testSteps.OrderBy(ts => ts.Id);
46+
var parser = new MarkdownTableParser(markdownContent);
47+
return parser.ParseTestSteps();
10348
}
10449

10550
private (string TestCaseId, string TestCaseTitle) GetTestCaseIdAndTitle(string[] lines)
@@ -127,12 +72,6 @@ private IEnumerable<TestStep> GetTestSteps(string markdownContent)
12772
return splittedItems[1].Trim();
12873
}
12974

130-
private string SanitizeWebString(string input)
131-
{
132-
return input
133-
.Replace("&quot;", "\"");
134-
}
135-
13675
private string? GetLinkedFile(string file, string? link)
13776
{
13877
if (string.IsNullOrWhiteSpace(link)) return null;

src/testr.Cli/testr.Cli.csproj

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
<ItemGroup>
3939
<PackageReference Include="CliWrap" Version="3.9.0" />
4040
<PackageReference Include="Fluid.Core" Version="2.25.0" />
41-
<PackageReference Include="HtmlAgilityPack" Version="1.12.2" />
42-
<PackageReference Include="Markdig" Version="0.41.3" />
4341
<PackageReference Include="McMaster.Extensions.CommandLineUtils" Version="4.1.1" />
4442
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="9.0.7" />
4543
<PackageReference Include="Microsoft.Playwright" Version="1.54.0" />

0 commit comments

Comments
 (0)