Skip to content

Commit 5d24cc8

Browse files
Merge pull request #1 from ThePoShWolf/master
Added ConvertFrom-HtmlTable
2 parents 908fa87 + c76bc56 commit 5d24cc8

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
$html = (Invoke-WebRequest -Uri 'https://en.wikipedia.org/wiki/PowerShell').Content
2+
$tmp = ConvertFrom-HtmlTable -Content $html
3+
4+
# If converting multiple tables, the output will look funky
5+
# since it is creating an array of different objects.
6+
$tmp[0] | Format-Table -AutoSize
7+
$tmp[1] | Format-Table -AutoSize
8+
$tmp[2] | Format-Table -AutoSize
9+
10+
# ... etc

Public/ConvertFrom-HtmlTable.ps1

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
Function ConvertFrom-HtmlTable {
2+
[cmdletbinding()]
3+
param (
4+
[Parameter(
5+
Mandatory = $true
6+
)]
7+
[string]$Content
8+
)
9+
Begin {
10+
# Initialize the parser
11+
$HTMLParser = [AngleSharp.Html.Parser.HtmlParser]::new()
12+
}
13+
Process {
14+
# Load the html
15+
$ParsedDocument = $HTMLParser.ParseDocument($content)
16+
17+
# Get all the tables
18+
[Array] $Tables = $ParsedDocument.GetElementsByTagName('table')
19+
20+
# For each table
21+
:table foreach ($table in $tables) {
22+
23+
# Get the headers / Where-Object is nessecary to get rid of empty values
24+
$headers = $table.Rows[0].Cells.TextContent.Trim() | Where-Object { $_ }
25+
26+
# if headers have value
27+
if ($null -ne $headers) {
28+
[Array] $output = foreach ($row in $table.Rows | Select-Object -Skip 0) {
29+
30+
# If there aren't as many cells as headers, skip this table
31+
if (@($row.Cells).count -ne $headers.count) {
32+
Write-Verbose 'Unsupported table.'
33+
Continue table
34+
}
35+
$obj = [ordered]@{ }
36+
37+
# add all the properties, one per row
38+
for ($x = 0; $x -lt $headers.count; $x++) {
39+
#$obj | Add-Member -MemberType NoteProperty -Name $headers[$x] -Value $row.Cells[$x].TextContent.Trim()
40+
$obj["$($headers[$x])"] = $row.Cells[$x].TextContent.Trim()
41+
}
42+
[PSCustomObject] $obj
43+
}
44+
# if there are any rows, output
45+
if ($output.count -ge 1) {
46+
@(, $output)
47+
} else {
48+
Write-Verbose 'Table has no rows'
49+
}
50+
}
51+
}
52+
}
53+
End { }
54+
}

0 commit comments

Comments
 (0)