Skip to content

Commit 24b75ad

Browse files
author
邹嵩
committed
添加测试用例
更新版本
1 parent a3ed709 commit 24b75ad

37 files changed

+378
-319
lines changed

nuget/DotnetSpider.Core.nuspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd">
33
<metadata>
44
<id>DotnetSpider2.Core</id>
5-
<version>2.3.7-beta4</version>
5+
<version>2.3.7-beta5</version>
66
<authors>zlzforever@163.com;Walterwhatwater;xiaohuan0204</authors>
77
<owners>zlzforever@163.com</owners>
88
<iconUrl>https://github.com/zlzforever/DotnetSpider/blob/master/images/icon.png?raw=true</iconUrl>

nuget/DotnetSpider.Extension.nuspec

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd">
33
<metadata>
44
<id>DotnetSpider2.Extension</id>
5-
<version>2.3.7-beta4</version>
5+
<version>2.3.7-beta5</version>
66
<authors>zlzforever@163.com;Walterwhatwater;xiaohuan0204</authors>
77
<owners>zlzforever@163.com</owners>
88
<iconUrl>https://github.com/zlzforever/DotnetSpider/blob/master/images/icon.png?raw=true</iconUrl>
@@ -13,7 +13,7 @@
1313
<description>A .NET Standard web crawling library similar to WebMagic and Scrapy. It is a lightweight ,efficient and fast high-level web crawling &amp; scraping framework for .NET</description>
1414
<dependencies>
1515
<group targetFramework=".NETStandard2.0">
16-
<dependency id="DotnetSpider2.Core" version="2.3.7-beta4" />
16+
<dependency id="DotnetSpider2.Core" version="2.3.7-beta5" />
1717
<dependency id="Dapper" version="1.50.2"/>
1818
<dependency id="MailKit" version="1.20.0"/>
1919
<dependency id="MongoDB.Driver" version="2.4.4"/>
@@ -28,7 +28,7 @@
2828
<dependency id="MessagePack" version="1.7.2"/>
2929
</group>
3030
<group targetFramework=".NETFramework4.5" >
31-
<dependency id="DotnetSpider2.Core" version="2.3.7-beta4" />
31+
<dependency id="DotnetSpider2.Core" version="2.3.7-beta5" />
3232
<dependency id="Dapper" version="1.50.2"/>
3333
<dependency id="MailKit" version="1.20.0"/>
3434
<dependency id="MongoDB.Driver" version="2.4.4"/>

src/DotnetSpider.Core.Test/DotnetSpider.Core.Test.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
<Service Include="{82a7f48d-3b50-4b1e-b82e-3ada8210c358}" />
2424
</ItemGroup>
2525
<ItemGroup>
26+
<None Update="Downloader\1.html">
27+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
28+
</None>
2629
<None Update="app.outside.config">
2730
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
2831
</None>
@@ -35,6 +38,9 @@
3538
<None Update="app.config">
3639
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
3740
</None>
41+
<None Update="Downloader\test.cookies">
42+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
43+
</None>
3844
<None Update="Jd.html">
3945
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
4046
</None>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hello
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
using DotnetSpider.Core.Downloader;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Text;
5+
using Xunit;
6+
7+
namespace DotnetSpider.Core.Test.Downloader
8+
{
9+
public class CookieInjectorTest
10+
{
11+
[Fact]
12+
public void FileCookieInject()
13+
{
14+
FileCookieInject cookieInject = new FileCookieInject("Downloader\\test.cookies");
15+
var spider = new DefaultSpider();
16+
cookieInject.Inject(spider.Downloader, spider);
17+
var cookies = spider.Downloader.GetCookies(new Uri("http://baidu.com"));
18+
Assert.Equal("b", cookies["a"].Value);
19+
Assert.Equal("e", cookies["c"].Value);
20+
}
21+
22+
[Fact]
23+
public void FileCookieInject_FileNotExists()
24+
{
25+
Assert.Throws<ArgumentException>(() =>
26+
{
27+
FileCookieInject cookieInject = new FileCookieInject("notexists.cookies");
28+
});
29+
}
30+
}
31+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
using DotnetSpider.Core.Downloader;
2+
using System.IO;
3+
using Xunit;
4+
5+
namespace DotnetSpider.Core.Test.Downloader
6+
{
7+
public class FileDownloaderTest
8+
{
9+
[Fact]
10+
public void DownloadRelativePathFile()
11+
{
12+
FileDownloader downloader = new FileDownloader();
13+
var request = new Request("file://Downloader/1.html");
14+
var spider = new DefaultSpider();
15+
var page = downloader.Download(request, spider);
16+
Assert.Equal("hello", page.Content);
17+
}
18+
19+
[Fact]
20+
public void DownloadRelativeAbsolutePathFile()
21+
{
22+
FileDownloader downloader = new FileDownloader();
23+
var path = Path.Combine(Env.BaseDirectory, "Downloader\\1.html");
24+
var request = new Request($"file://{path}");
25+
var spider = new DefaultSpider();
26+
var page = downloader.Download(request, spider);
27+
Assert.Equal("hello", page.Content);
28+
}
29+
30+
[Fact]
31+
public void FileNotExists()
32+
{
33+
FileDownloader downloader = new FileDownloader();
34+
var request = new Request("file://Downloader/2.html");
35+
var spider = new DefaultSpider();
36+
var page = downloader.Download(request, spider);
37+
Assert.True(string.IsNullOrEmpty(page.Content));
38+
Assert.Equal("File downloader\\2.html unfound.", page.Exception.Message);
39+
Assert.True(page.Skip);
40+
}
41+
}
42+
}

src/DotnetSpider.Core.Test/Downloader/HttpClientDownloaderTest.cs

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,41 +7,10 @@
77

88
namespace DotnetSpider.Core.Test.Downloader
99
{
10-
1110
public class HttpClientDownloaderTest
1211
{
13-
//[Fact]
14-
//public void Timeout()
15-
//{
16-
// HttpClientDownloader downloader = new HttpClientDownloader();
17-
// DefaultSpider spider = new DefaultSpider("abcd", new Site { Timeout = 5000 });
18-
// downloader.Download(new Request("http://www.163.com", null), spider);
19-
// try
20-
// {
21-
// downloader.Download(new Request("http://localhost/abcasdfasdfasdfas", null), spider);
22-
// throw new Exception("Test Failed");
23-
// }
24-
// catch (SpiderException e)
25-
// {
26-
// Assert.IsNotNull(e);
27-
// }
28-
// Stopwatch watch = new Stopwatch();
29-
// watch.Start();
30-
// try
31-
// {
32-
// downloader.Download(new Request("http://google.com/", null), spider);
33-
// }
34-
// catch (SpiderException e)
35-
// {
36-
// Assert.IsNotNull(e);
37-
// }
38-
// watch.Stop();
39-
// Assert.True(watch.ElapsedMilliseconds > 5000);
40-
// Assert.True(watch.ElapsedMilliseconds < 6000);
41-
//}
42-
4312
/// <summary>
44-
/// 手动执行此测试脚本,运行结束后用netstat -ano 查看端口占用情况。只会占用一个就对了。如果
13+
/// 手动执行此测试脚本,运行结束后用netstat -ano 查看端口占用情况。只会占用一个就对了
4514
/// </summary>
4615
[Fact(Skip = "Need person double check")]
4716
public void Ports()
@@ -132,8 +101,8 @@ public void GetTargetUrlWhenRedirect()
132101
};
133102
var downloader = new HttpClientDownloader();
134103
var page = downloader.Download(new Request("http://item.jd.com/1231222221111123.html", null), new DefaultSpider("test", site));
135-
136-
Assert.True(page.TargetUrl.Contains("www.jd.com/2017?t=") || page.TargetUrl.Contains("global.jd.com"));
104+
Assert.DoesNotContain("1231222221111123", page.TargetUrl);
105+
Assert.True(page.TargetUrl.Contains("www.jd.com/") || page.TargetUrl.Contains("global.jd.com"));
137106
}
138107
}
139108
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
baidu.com
2+
/
3+
a=b;c=e

src/DotnetSpider.Core/DefaultSpider.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using DotnetSpider.Core.Pipeline;
23
using DotnetSpider.Core.Processor;
34
using DotnetSpider.Core.Scheduler;
45

@@ -21,7 +22,7 @@ public class DefaultSpider : Spider
2122
/// </summary>
2223
/// <param name="id">爬虫标识</param>
2324
/// <param name="site">网站信息</param>
24-
public DefaultSpider(string id, Site site) : base(site, id, new QueueDuplicateRemovedScheduler(), new SimplePageProcessor())
25+
public DefaultSpider(string id, Site site) : base(site, id, new QueueDuplicateRemovedScheduler(), new[] { new SimplePageProcessor() }, new[] { new NullPipeline() })
2526
{
2627
}
2728

@@ -31,7 +32,7 @@ public class DefaultSpider : Spider
3132
/// <param name="id">爬虫标识</param>
3233
/// <param name="site">网站信息</param>
3334
/// <param name="scheduler">URL队列</param>
34-
public DefaultSpider(string id, Site site, IScheduler scheduler) : base(site, id, scheduler, new SimplePageProcessor())
35+
public DefaultSpider(string id, Site site, IScheduler scheduler) : base(site, id, scheduler, new[] { new SimplePageProcessor() }, new[] { new NullPipeline() })
3536
{
3637
}
3738
}

src/DotnetSpider.Core/Downloader/AfterDownloadCompleteHandler.Extensions.cs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,7 @@ public override void Handle(ref Page page, IDownloader downloader, ISpider spide
4646
page.AddTargetRequest(request);
4747
}
4848

49-
if (!_extractByProcessor)
50-
{
51-
page.SkipExtractTargetUrls = !page.SkipExtractTargetUrls || page.SkipExtractTargetUrls;
52-
}
49+
page.SkipExtractTargetUrls = !_extractByProcessor;
5350
}
5451
}
5552

@@ -71,10 +68,10 @@ public TimingUpdateCookieHandler(int interval, ICookieInjector injector)
7168
{
7269
if (interval <= 0)
7370
{
74-
throw new SpiderException("dueTime should be large than 0.");
71+
throw new ArgumentException("interval should be large than 0.");
7572
}
7673

77-
_cookieInjector = injector ?? throw new SpiderException("CookieInjector should not be null.");
74+
_cookieInjector = injector ?? throw new ArgumentNullException("CookieInjector should not be null.");
7875
_next = DateTime.Now.AddSeconds(_interval);
7976
_interval = interval;
8077
}
@@ -89,8 +86,8 @@ public override void Handle(ref Page page, IDownloader downloader, ISpider spide
8986
{
9087
if (DateTime.Now > _next)
9188
{
92-
_cookieInjector.Inject(downloader, spider);
9389
_next = DateTime.Now.AddSeconds(_interval);
90+
_cookieInjector.Inject(downloader, spider);
9491
}
9592
}
9693
}

0 commit comments

Comments
 (0)