Skip to content

Commit 0f3c94c

Browse files
author
邹嵩
committed
默认Pipeline设为Console
修复WebDriverDownloader的一个错误 支持设置Take的方向
1 parent b13ecb7 commit 0f3c94c

File tree

14 files changed

+72
-39
lines changed

14 files changed

+72
-39
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,4 @@ FakesAssemblies/
202202
*.opt
203203

204204
/src/DotnetSpider.Test/Example/TestExample.cs
205+
/src/DotnetSpider.Sample/Class2.cs

nuget/DotnetSpider.Core.nuspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd">
33
<metadata>
44
<id>DotnetSpider2.Core</id>
5-
<version>2.3.8</version>
5+
<version>2.3.9-beta2</version>
66
<authors>zlzforever@163.com;Walterwhatwater;xiaohuan0204</authors>
77
<owners>zlzforever@163.com</owners>
88
<iconUrl>https://github.com/zlzforever/DotnetSpider/blob/master/images/icon.png?raw=true</iconUrl>

nuget/DotnetSpider.Extension.nuspec

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd">
33
<metadata>
44
<id>DotnetSpider2.Extension</id>
5-
<version>2.3.8</version>
5+
<version>2.3.9-beta2</version>
66
<authors>zlzforever@163.com;Walterwhatwater;xiaohuan0204</authors>
77
<owners>zlzforever@163.com</owners>
88
<iconUrl>https://github.com/zlzforever/DotnetSpider/blob/master/images/icon.png?raw=true</iconUrl>
@@ -13,7 +13,7 @@
1313
<description>A .NET Standard web crawling library similar to WebMagic and Scrapy. It is a lightweight ,efficient and fast high-level web crawling &amp; scraping framework for .NET</description>
1414
<dependencies>
1515
<group targetFramework=".NETStandard2.0">
16-
<dependency id="DotnetSpider2.Core" version="2.3.8" />
16+
<dependency id="DotnetSpider2.Core" version="2.3.9-beta2" />
1717
<dependency id="Dapper" version="1.50.2"/>
1818
<dependency id="MailKit" version="1.20.0"/>
1919
<dependency id="MongoDB.Driver" version="2.4.4"/>
@@ -28,7 +28,7 @@
2828
<dependency id="MessagePack" version="1.7.2"/>
2929
</group>
3030
<group targetFramework=".NETFramework4.5" >
31-
<dependency id="DotnetSpider2.Core" version="2.3.8" />
31+
<dependency id="DotnetSpider2.Core" version="2.3.9-beta2" />
3232
<dependency id="Dapper" version="1.50.2"/>
3333
<dependency id="MailKit" version="1.20.0"/>
3434
<dependency id="MongoDB.Driver" version="2.4.4"/>

src/DotnetSpider.Core/DefaultSpider.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public class DefaultSpider : Spider
2222
/// </summary>
2323
/// <param name="id">爬虫标识</param>
2424
/// <param name="site">网站信息</param>
25-
public DefaultSpider(string id, Site site) : base(site, id, new QueueDuplicateRemovedScheduler(), new[] { new SimplePageProcessor() }, new[] { new NullPipeline() })
25+
public DefaultSpider(string id, Site site) : base(site, id, new QueueDuplicateRemovedScheduler(), new[] { new SimplePageProcessor() }, new[] { new ConsolePipeline() })
2626
{
2727
}
2828

@@ -32,7 +32,7 @@ public class DefaultSpider : Spider
3232
/// <param name="id">爬虫标识</param>
3333
/// <param name="site">网站信息</param>
3434
/// <param name="scheduler">URL队列</param>
35-
public DefaultSpider(string id, Site site, IScheduler scheduler) : base(site, id, scheduler, new[] { new SimplePageProcessor() }, new[] { new NullPipeline() })
35+
public DefaultSpider(string id, Site site, IScheduler scheduler) : base(site, id, scheduler, new[] { new SimplePageProcessor() }, new[] { new ConsolePipeline() })
3636
{
3737
}
3838
}

src/DotnetSpider.Core/Pipeline/NullPipeline.cs

Lines changed: 0 additions & 21 deletions
This file was deleted.

src/DotnetSpider.Core/Selector/BaseSelectable.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,5 @@ public List<string> GetValues(bool isPlainText)
161161
/// <param name="selector">查询器</param>
162162
/// <returns>查询接口</returns>
163163
public abstract ISelectable SelectList(ISelector selector);
164-
165164
}
166165
}

src/DotnetSpider.Core/Spider.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ protected virtual void InitComponent(params string[] arguments)
978978
/// <returns>数据管道</returns>
979979
protected virtual IPipeline GetDefaultPipeline()
980980
{
981-
return new NullPipeline();
981+
return new ConsolePipeline();
982982
}
983983

984984
/// <summary>

src/DotnetSpider.Extension/CommonSpider.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ protected override void RunApp(params string[] arguments)
8282

8383
if (Pipelines == null || Pipelines.Count == 0)
8484
{
85-
AddPipeline(new NullPipeline());
85+
AddPipeline(new ConsolePipeline());
8686
}
8787
if (PageProcessors == null || PageProcessors.Count == 0)
8888
{

src/DotnetSpider.Extension/Downloader/WebDriverDownloader.cs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public class WebDriverDownloader : BaseDownloader
2626
private readonly Browser _browser;
2727
private readonly Option _option;
2828
private bool _isDisposed;
29-
private readonly string[] _domains;
29+
private readonly List<string> _domains = new List<string>();
3030
/// <summary>
3131
/// 每次navigate完成后, WebDriver 需要执行的操作
3232
/// </summary>
@@ -44,7 +44,16 @@ public WebDriverDownloader(Browser browser, string[] domains = null, int webDriv
4444
_webDriverWaitTime = webDriverWaitTime;
4545
_browser = browser;
4646
_option = option ?? new Option();
47-
_domains = domains;
47+
if (domains != null)
48+
{
49+
foreach (var domain in domains)
50+
{
51+
if (!string.IsNullOrWhiteSpace(domain) && !_domains.Contains(domain))
52+
{
53+
_domains.Add(domain);
54+
}
55+
}
56+
}
4857

4958
if (browser == Browser.Firefox && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
5059
{
@@ -96,12 +105,15 @@ protected override Page DowloadContent(Request request, ISpider spider)
96105
{
97106
_webDriver = WebDriverUtil.Open(_browser, _option);
98107

99-
foreach (var domain in _domains)
108+
if (_domains != null)
100109
{
101-
var cookies = CookieContainer.GetCookies(new Uri(domain));
102-
foreach (System.Net.Cookie cookie in cookies)
110+
foreach (var domain in _domains)
103111
{
104-
AddCookieToDownloadClient(cookie);
112+
var cookies = CookieContainer.GetCookies(new Uri(domain));
113+
foreach (System.Net.Cookie cookie in cookies)
114+
{
115+
AddCookieToDownloadClient(cookie);
116+
}
105117
}
106118
}
107119

@@ -177,6 +189,10 @@ protected override Page DowloadContent(Request request, ISpider spider)
177189
[MethodImpl(MethodImplOptions.Synchronized)]
178190
protected override void AddCookieToDownloadClient(System.Net.Cookie cookie)
179191
{
192+
if (!_domains.Contains(cookie.Domain))
193+
{
194+
_domains.Add(cookie.Domain);
195+
}
180196
_webDriver?.Manage().Cookies.AddCookie(new Cookie(cookie.Name, cookie.Value, cookie.Domain, cookie.Path, null));
181197
}
182198
}

src/DotnetSpider.Extension/Model/Attribute/EntitySelector.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,10 @@ public class EntitySelector : SelectorAttribute
1212
/// 从最终解析到的结果中取前 Take 个实体
1313
/// </summary>
1414
public int Take { get; set; } = -1;
15+
16+
/// <summary>
17+
/// 设置 Take 的方向, 默认是从头部取
18+
/// </summary>
19+
public bool TakeFromHead { get; set; } = true;
1520
}
1621
}

0 commit comments

Comments
 (0)