File tree Expand file tree Collapse file tree 11 files changed +669
-0
lines changed
Expand file tree Collapse file tree 11 files changed +669
-0
lines changed Original file line number Diff line number Diff line change 55* .ear
66/CSharp /IWAAuthWebServer /bin /Debug
77/CSharp /IWAAuthWebServer /obj /Debug
8+ /CSharp /DynamicWebScraping /obj
9+ /CSharp /DynamicWebScraping /bin
10+ /CSharp /DynamicWebScraping /.vs
11+ /CSharp /WebScraping /bin
12+ /CSharp /WebScraping /obj
13+ /CSharp /WebScraping /.vscode
14+ /CSharp /WebScraping /.vs
Original file line number Diff line number Diff line change 1+ <Project Sdk =" Microsoft.NET.Sdk" >
2+
3+ <PropertyGroup >
4+ <OutputType >Exe</OutputType >
5+ <TargetFramework >netcoreapp3.1</TargetFramework >
6+ </PropertyGroup >
7+
8+ <ItemGroup >
9+ <PackageReference Include =" DotNetSeleniumExtras.WaitHelpers" Version =" 3.11.0" />
10+ <PackageReference Include =" Selenium.WebDriver" Version =" 3.141.0" />
11+ <PackageReference Include =" Selenium.WebDriver.ChromeDriver" Version =" 85.0.4183.8700" />
12+ </ItemGroup >
13+
14+ <ItemGroup >
15+ <None Update =" page.html" >
16+ <CopyToOutputDirectory >Always</CopyToOutputDirectory >
17+ </None >
18+ </ItemGroup >
19+
20+ </Project >
Original file line number Diff line number Diff line change 1+ using OpenQA . Selenium ;
2+ using OpenQA . Selenium . Chrome ;
3+ using OpenQA . Selenium . Support . UI ;
4+ using SeleniumExtras . WaitHelpers ;
5+ using System ;
6+ using System . IO ;
7+ using System . Reflection ;
8+
9+ namespace DynamicWebScraping
10+ {
11+ class Program
12+ {
13+ static void Main ( string [ ] args )
14+ {
15+ Scrape ( ) ;
16+ Console . ReadLine ( ) ;
17+ }
18+
19+ public static void Scrape ( )
20+ {
21+ ChromeOptions options = new ChromeOptions ( ) ;
22+ using ( IWebDriver driver = new ChromeDriver ( options ) )
23+ {
24+ WebDriverWait wait = new WebDriverWait ( driver , TimeSpan . FromSeconds ( 10 ) ) ;
25+ driver . Navigate ( ) . GoToUrl ( $ "file://{ Path . GetDirectoryName ( Assembly . GetExecutingAssembly ( ) . Location ) } /page.html") ;
26+ driver . FindElement ( By . Id ( "heading1" ) ) . Click ( ) ;
27+ IWebElement firstResult = wait . Until ( ExpectedConditions . ElementExists ( By . Id ( "heading2" ) ) ) ;
28+ Console . WriteLine ( firstResult . GetAttribute ( "textContent" ) ) ;
29+ }
30+ }
31+ }
32+ }
Original file line number Diff line number Diff line change 1+ # Web Scraping
2+
3+ Follow my tutorial [ here] ( TBD ) :-)
4+
5+ ## Instructions to Run the Project
6+
7+ 1 . Navigate in the console to the project directory.
8+ 1 . Execute ` dotnet restore ` .
9+ 1 . Execute ` dotnet run ` .
10+ 1 . Enjoy life.
Original file line number Diff line number Diff line change 1+ <!DOCTYPE html>
2+
3+ < html lang ="en " xmlns ="http://www.w3.org/1999/xhtml ">
4+ < head >
5+ < meta charset ="utf-8 " />
6+ < title > Sample</ title >
7+ < script >
8+ function addMoreContent ( ) {
9+ var myDiv = document . getElementById ( "MyDiv" ) ;
10+ var newElement = document . createElement ( "h2" ) ;
11+ newElement . id = "heading2" ;
12+ newElement . appendChild ( document . createTextNode ( "World" ) ) ;
13+ document . getElementById ( "body" ) . appendChild ( newElement ) ;
14+ }
15+ </ script >
16+ </ head >
17+ < body id ="body ">
18+ < h1 id ="heading1 " onclick ="addMoreContent() " style ="cursor:pointer "> Hello</ h1 >
19+ </ body >
20+ </ html >
Original file line number Diff line number Diff line change 1+ using HtmlAgilityPack ;
2+ using System ;
3+ using System . Linq ;
4+
5+ namespace Vainolo . WebScraping
6+ {
7+ class Method1
8+ {
9+ public static void Scrape ( )
10+ {
11+ var page = new HtmlDocument ( ) ;
12+ page . Load ( "WebScraping.html" ) ;
13+ var techniquesTitle = page . GetElementbyId ( "Techniques" ) ;
14+ var currNode = techniquesTitle . ParentNode . NextSibling ;
15+ while ( currNode . Name != "h2" )
16+ {
17+ if ( currNode . GetClasses ( ) . Contains ( "mw-headline" ) )
18+ {
19+ var headline = currNode . InnerText ;
20+ Console . WriteLine ( headline ) ;
21+ }
22+ if ( currNode . HasChildNodes )
23+ {
24+ currNode = currNode . FirstChild ;
25+ }
26+ else if ( currNode == currNode . ParentNode . LastChild )
27+ {
28+ while ( currNode . ParentNode . NextSibling == null )
29+ {
30+ currNode = currNode . ParentNode ;
31+ }
32+ currNode = currNode . ParentNode . NextSibling ;
33+ }
34+ else
35+ {
36+ currNode = currNode . NextSibling ;
37+ }
38+ }
39+ }
40+ }
41+ }
Original file line number Diff line number Diff line change 1+ using HtmlAgilityPack ;
2+ using System ;
3+ using System . Linq ;
4+
5+ namespace Vainolo . WebScraping
6+ {
7+ class Method2
8+ {
9+ public static void Scrape ( )
10+ {
11+ var page = new HtmlDocument ( ) ;
12+ page . Load ( "WebScraping.html" ) ;
13+ var nodes = page . DocumentNode . Descendants ( ) . SkipWhile ( e => e . Id != "Techniques" ) . Skip ( 1 ) . TakeWhile ( e => e . Name != "h2" ) ;
14+
15+ foreach ( var currNode in nodes )
16+ {
17+ if ( currNode . GetClasses ( ) . Contains ( "mw-headline" ) )
18+ {
19+ var headline = currNode . InnerText ;
20+ Console . WriteLine ( headline ) ;
21+ }
22+ }
23+ }
24+ }
25+ }
Original file line number Diff line number Diff line change 1+ using System ;
2+ using HtmlAgilityPack ;
3+ using System . Linq ;
4+
5+ namespace Vainolo . WebScraping
6+ {
7+ class Program
8+ {
9+ static void Main ( string [ ] args )
10+ {
11+ Console . WriteLine ( "Scraping using method 1" ) ;
12+ Console . WriteLine ( "-----------------------" ) ;
13+ Method1 . Scrape ( ) ;
14+ Console . WriteLine ( "" ) ;
15+ Console . WriteLine ( "Scraping using method 2" ) ;
16+ Console . WriteLine ( "-----------------------" ) ;
17+ Method2 . Scrape ( ) ;
18+ Console . ReadLine ( ) ;
19+ }
20+ }
21+ }
Original file line number Diff line number Diff line change 1+ # Web Scraping
2+
3+ Follow my tutorial [ here] ( https://www.vainolo.com/2020/05/06/scraping-web-pages-with-c-and-htmlagilitypack/ ) :-)
4+
5+ ## Instructions to Run the Project
6+
7+ 1 . Navigate in the console to the project directory.
8+ 1 . Execute ` dotnet restore ` .
9+ 1 . Execute ` dotnet run ` .
10+ 1 . Enjoy life.
Original file line number Diff line number Diff line change 1+ <Project Sdk =" Microsoft.NET.Sdk" >
2+
3+ <PropertyGroup >
4+ <OutputType >Exe</OutputType >
5+ <TargetFramework >netcoreapp3.1</TargetFramework >
6+ </PropertyGroup >
7+
8+ <ItemGroup >
9+ <PackageReference Include =" HtmlAgilityPack" Version =" 1.11.23" />
10+ </ItemGroup >
11+
12+ <ItemGroup >
13+ <None Update =" WebScraping.html" >
14+ <CopyToOutputDirectory >Always</CopyToOutputDirectory >
15+ </None >
16+ </ItemGroup >
17+
18+ </Project >
You can’t perform that action at this time.
0 commit comments