Skip to content

Commit 0cf80bf

Browse files
author
spenney
committed
Whatever You Want There Too
1 parent 330adca commit 0cf80bf

File tree

1 file changed

+33
-2
lines changed

1 file changed

+33
-2
lines changed

WebCrawler/Model/Crawler.cs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ public void InitializeCrawl(string key, string url)
7979
{
8080
//var crawlUrl = ConfigurationManager.AppSettings["url"];
8181
Configuration configuration = ConfigurationManager.OpenExeConfiguration(ConfigurationUserLevel.None);
82+
8283
configuration.AppSettings.Settings["url"].Value = url;
8384
configuration.Save();
84-
8585
ConfigurationManager.RefreshSection("appSettings");
8686
CrawlPage(ConfigurationManager.AppSettings["url"]);
8787
}
@@ -130,13 +130,19 @@ private void CrawlPage(string url)
130130
{
131131
links.Add(new Link("Page Title", urlToParse));
132132
log.Add(new Log($"New Entry: {url}", DateTime.Now));
133+
134+
}
135+
if (linkParser.GoodUrls.Count == 0)
136+
{
137+
links.Add(new Link("Page Title", "==============No New URLs From Page=============="));
133138
}
134139

135140
foreach (string exception in linkParser.Exceptions)
136141
_exceptions.Add(exception);
137142

138143
isCurrentPage = false;
139-
144+
Console.WriteLine(linkParser.GoodUrls);
145+
Console.WriteLine(_externalUrlRepository.List);
140146
//Crawl all the links found on the page.
141147
foreach (string link in _externalUrlRepository.List)
142148
{
@@ -147,6 +153,7 @@ private void CrawlPage(string url)
147153

148154
if (formattedLink != String.Empty)
149155
{
156+
links.Add(new Link(" ", "==============Crawling to new external page...=============="));
150157
CrawlPage(formattedLink);
151158
}
152159
}
@@ -155,6 +162,30 @@ private void CrawlPage(string url)
155162
_failedUrlRepository.List.Add(formattedLink + " (on page at url " + url + ") - " + exc.Message);
156163
}
157164
}
165+
int loopBreak = 0;
166+
foreach (string link in linkParser.GoodUrls)
167+
{
168+
string formattedLink = link;
169+
loopBreak++;
170+
try
171+
{
172+
formattedLink = FixPath(url, formattedLink);
173+
174+
if (formattedLink != String.Empty)
175+
{
176+
links.Add(new Link(" ", "==============Crawling to new internal page...=============="));
177+
CrawlPage(formattedLink);
178+
}
179+
}
180+
catch (Exception exc)
181+
{
182+
_failedUrlRepository.List.Add(formattedLink + " (on page at url " + url + ") - " + exc.Message);
183+
}
184+
if (loopBreak >= 100)
185+
{
186+
break;
187+
}
188+
}
158189
}
159190
}
160191

0 commit comments

Comments
 (0)