Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions RuiJi.Net.Node/Feed/Db/FeedModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,18 +134,28 @@ private static List<WebHeader> GetHeaders(string headers)
while (!string.IsNullOrEmpty(line))
{
var sp = line.Split(':');

if (sp.Length < 2)
{
continue;
/*
* 这里原来的 continue 会导致死循环 endless loop:
* Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36
*/
goto next;
}

result.Add(new WebHeader(line.Substring(0, line.IndexOf(':')), line.Substring(line.IndexOf(':') + 1)));
var endIndex = line.IndexOf(':');
result.Add(new WebHeader(line.Substring(0, endIndex),
line.Substring(endIndex + 1)));

if (reader.EndOfStream)
break;

next:
line = reader.ReadLine();
}
}

return result;
}
}
Expand Down
19 changes: 16 additions & 3 deletions RuiJi.Net.NodeVisitor/Setter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Configuration;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -39,14 +40,26 @@ public static string GetRandomSettingUA()
string response = "";
var resetEvent = new ManualResetEvent(false);

var handle = client.ExecuteAsync(restRequest, (restResponse) => {
response = restResponse.Content;
// 这里服务端返回了 500,导致返回的 UA 是一串 HTML
var handle = client.ExecuteAsync(restRequest, (restResponse) =>
{
if (restResponse.StatusCode == HttpStatusCode.OK)
{
response = restResponse.Content;
}
else
{
// 如果服务端返回了失败的结果
response =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36";
}
resetEvent.Set();
});

// 可以全部改成 Task 异步
resetEvent.WaitOne();

return response;
}
}
}
}
13 changes: 11 additions & 2 deletions RuiJi.Net.Owin/Controllers/TestController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;

namespace RuiJi.Net.Owin.Controllers
{
Expand All @@ -37,18 +38,26 @@ public object TestRule([FromBody]RuleModel rule, bool debug = false)
var response = Crawler.Request(request);
if (response != null && response.Data != null)
{
if (response.StatusCode!= HttpStatusCode.OK)
{
// TODO:
}
var content = response.Data.ToString();
var block = RuiJiBlockParser.ParserBlock(rule.RuiJiExpression);
var r = new ExtractRequest();
r.Content = content;
if (string.IsNullOrWhiteSpace(rule.RuiJiExpression))
{
return r;
}
var block = RuiJiBlockParser.ParserBlock(rule.RuiJiExpression);

r.Blocks = new List<ExtractFeatureBlock> {
new ExtractFeatureBlock (block,rule.Feature)
};

var results = Extractor.Extract(r);

var result = results.OrderByDescending(m => m.Metas.Count).FirstOrDefault();
var result = results?.OrderByDescending(m => m.Metas?.Count??0)?.FirstOrDefault();

if (result != null && result.Paging != null && result.Paging.Count > 0 && result.Metas != null && result.Metas.ContainsKey("content"))
{
Expand Down
12 changes: 6 additions & 6 deletions RuiJi.Net.Owin/ruiji.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
"docServer": "localhost:80"
},
"storage": {
"ua": "LiteDb/UAs.db",
"rule": "LiteDb/Rules.db",
"proxy": "LiteDb/Proxys.db",
"func": "LiteDb/Funcs.db",
"feed": "LiteDb/Feeds.db",
"content": "LiteDb/Content/{shard}.db"
"ua": "LiteDb/UAs.db;Mode=Exclusive",
"rule": "LiteDb/Rules.db;Mode=Exclusive",
"proxy": "LiteDb/Proxys.db;Mode=Exclusive",
"func": "LiteDb/Funcs.db;Mode=Exclusive",
"feed": "LiteDb/Feeds.db;Mode=Exclusive",
"content": "LiteDb/Content/{shard}.db;Mode=Exclusive"
}
Copy link
Author

@RockNHawk RockNHawk Sep 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

少了 Filename 参数,会报错,应该这样:

"storage": {
    "ua": "Filename=LiteDb/UAs.db;Mode=Exclusive",
    "rule": "Filename=LiteDb/Rules.db;Mode=Exclusive",
    "proxy": "Filename=LiteDb/Proxys.db;Mode=Exclusive",
    "func": "Filename=LiteDb/Funcs.db;Mode=Exclusive",
    "feed": "Filename=LiteDb/Feeds.db;Mode=Exclusive",
    "content": "Filename=LiteDb/Content/{shard}.db;Mode=Exclusive"
}

//,
//"nodes": [
Expand Down