Skip to content

Commit a4c1ea1

Browse files
committed
Start migration to AngleSharp; Fix SauceNao
1 parent 36b83a9 commit a4c1ea1

File tree

11 files changed

+251
-195
lines changed

11 files changed

+251
-195
lines changed

SmartImage.Lib/Engines/Impl/Ascii2DEngine.cs

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4+
using AngleSharp;
5+
using AngleSharp.Dom;
6+
using AngleSharp.Html.Dom;
7+
using AngleSharp.Html.Parser;
8+
using AngleSharp.XPath;
49
using HtmlAgilityPack;
510
using SimpleCore.Net;
611
using SmartImage.Lib.Searching;
@@ -24,7 +29,7 @@ public Ascii2DEngine() : base("https://ascii2d.net/search/url/") { }
2429
*
2530
*/
2631

27-
protected override HtmlDocument GetDocument(SearchResult sr)
32+
protected override IDocument GetDocument(SearchResult sr)
2833
{
2934
var url = sr.RawUri.ToString();
3035

@@ -47,25 +52,29 @@ protected override HtmlDocument GetDocument(SearchResult sr)
4752
}
4853

4954
//[DebuggerHidden]
50-
protected override SearchResult Process(HtmlDocument doc, SearchResult sr)
55+
protected override SearchResult Process(IDocument doc, SearchResult sr)
5156
{
5257

53-
var nodes = doc.DocumentNode.SelectNodes("//*[contains(@class, 'info-box')]");
58+
59+
var nodes = doc.Body.SelectNodes("//*[contains(@class, 'info-box')]");
60+
61+
62+
//var nodes = doc.DocumentNode.SelectNodes("//*[contains(@class, 'info-box')]");
5463

5564
var rg = new List<ImageResult>();
5665

5766
foreach (var node in nodes) {
5867

5968
var ir = new ImageResult();
6069

61-
var info = node.ChildNodes.Where(n => !string.IsNullOrWhiteSpace(n.InnerText)).ToArray();
70+
var info = node.ChildNodes.Where(n => !string.IsNullOrWhiteSpace(n.TextContent)).ToArray();
6271

6372

64-
var hash = info.First().InnerText;
73+
var hash = info.First().TextContent;
6574

6675
ir.OtherMetadata.Add("Hash", hash);
6776

68-
var data = info[1].InnerText.Split(' ');
77+
var data = info[1].TextContent.Split(' ');
6978

7079
var res = data[0].Split('x');
7180
ir.Width = int.Parse(res[0]);
@@ -80,24 +89,24 @@ protected override SearchResult Process(HtmlDocument doc, SearchResult sr)
8089
var desc = info.Last().FirstChild;
8190
var ns = desc.NextSibling;
8291

83-
if (node2.ChildNodes.Count >= 2 && node2.ChildNodes[1].ChildNodes.Count >= 2) {
92+
if (node2.ChildNodes.Length >= 2 && node2.ChildNodes[1].ChildNodes.Length >= 2) {
8493
var node2Sub = node2.ChildNodes[1];
8594

86-
if (node2Sub.ChildNodes.Count >= 8) {
87-
ir.Description = node2Sub.ChildNodes[3].InnerText.Trim();
88-
ir.Artist = node2Sub.ChildNodes[5].InnerText.Trim();
89-
ir.Site = node2Sub.ChildNodes[7].InnerText.Trim();
95+
if (node2Sub.ChildNodes.Length >= 8) {
96+
ir.Description = node2Sub.ChildNodes[3].TextContent.Trim();
97+
ir.Artist = node2Sub.ChildNodes[5].TextContent.Trim();
98+
ir.Site = node2Sub.ChildNodes[7].TextContent.Trim();
9099

91100
}
92101
}
93102

94103
//var childNode = ns.ChildNodes[1].ChildNodes[0];
95-
if (ns.ChildNodes.Count >= 4) {
104+
if (ns.ChildNodes.Length >= 4) {
96105
var childNode = ns.ChildNodes[3];
97106
//Debug.WriteLine($"{childNode.Attributes.Select(a=>a.Name + $" {a.Value}").QuickJoin()}");
98107

99108

100-
var l1 = childNode.GetAttributeValue("href", null);
109+
var l1 = ((IHtmlAnchorElement)childNode).GetAttribute("href");
101110

102111
if (l1 is not null) {
103112
ir.Url = new Uri(l1);

SmartImage.Lib/Engines/Impl/IqdbEngine.cs

Lines changed: 66 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
// ReSharper disable UnusedMember.Global
22

33
using System;
4+
using System.Collections.Generic;
45
using System.Diagnostics;
56
using System.Linq;
67
using System.Threading;
8+
using AngleSharp.Dom;
9+
using AngleSharp.Html.Dom;
10+
using AngleSharp.XPath;
711
using HtmlAgilityPack;
812
using SimpleCore.Net;
913
using SimpleCore.Utilities;
@@ -20,36 +24,49 @@ public IqdbEngine() : base("https://iqdb.org/?url=") { }
2024
public override string Name => "IQDB";
2125

2226
//public static float? FilterThreshold => 70.00F;
23-
24-
private static ImageResult ParseResult(HtmlNodeCollection tr)
27+
28+
private static ImageResult ParseResult(IHtmlCollection<IElement> tr)
2529
{
2630
var caption = tr[0];
2731
var img = tr[1];
2832
var src = tr[2];
2933

3034
string url = null!;
3135

32-
var urlNode = img.FirstChild.FirstChild;
36+
//var urlNode = img.FirstChild.FirstChild;
3337

34-
if (urlNode.Name != "img") {
35-
var origUrl = urlNode.Attributes["href"].Value;
38+
// if (urlNode.NodeName != "img") {
39+
// var origUrl = urlNode.GetAttr("href");
40+
//
41+
// // Links must begin with http:// in order to work with "start"
42+
// if (origUrl.StartsWith("//")) {
43+
// origUrl = "http:" + origUrl;
44+
// }
45+
//
46+
//
47+
// url = origUrl;
48+
// }
3649

37-
// Links must begin with http:// in order to work with "start"
38-
if (origUrl.StartsWith("//")) {
39-
origUrl = "http:" + origUrl;
40-
}
50+
//src.FirstChild.ChildNodes[2].ChildNodes[0].GetAttr("href")
4151

4252

43-
url = origUrl;
53+
try {
54+
url = src.FirstChild.ChildNodes[2].ChildNodes[0].GetAttr("href");
55+
56+
// Links must begin with http:// in order to work with "start"
57+
if (url.StartsWith("//")) {
58+
url = "http:" + url;
59+
}
4460
}
61+
catch { }
4562

4663

4764
int w = 0, h = 0;
4865

49-
if (tr.Count >= 4) {
66+
if (tr.Length >= 4) {
5067
var res = tr[3];
5168

52-
var wh = res.InnerText.Split(Formatting.MUL_SIGN);
69+
var wh = res.TextContent.Split(Formatting.MUL_SIGN);
5370

5471
var wStr = wh[0].SelectOnlyDigits();
5572
w = Int32.Parse(wStr);
@@ -62,60 +79,80 @@ private static ImageResult ParseResult(HtmlNodeCollection tr)
6279

6380
float? sim;
6481

65-
if (tr.Count >= 5) {
82+
if (tr.Length >= 5) {
6683
var simNode = tr[4];
67-
var simStr = simNode.InnerText.Split('%')[0];
84+
var simStr = simNode.TextContent.Split('%')[0];
6885
sim = Single.Parse(simStr);
6986
sim = MathF.Round(sim.Value, 2);
7087
}
7188
else {
7289
sim = null;
7390
}
7491

92+
Uri uri;
93+
94+
if (url != null) {
95+
// var uriBuilder = new UriBuilder(url)
96+
// {
97+
// Scheme = Uri.UriSchemeHttps,
98+
// Port = -1 // default port for scheme
99+
// };
100+
// uri = uriBuilder.Uri;
101+
uri = new Uri(url);
102+
}
103+
else {
104+
uri = null;
105+
}
75106

76107
//var i = new BasicSearchResult(url, sim, w, h, src.InnerText, null, caption.InnerText);
108+
109+
77110
var i = new ImageResult()
78111
{
79-
Url = url is null ? null : new Uri(url!),
112+
Url = uri,
80113
Similarity = sim,
81114
Width = w,
82115
Height = h,
83-
Source = src.InnerText,
84-
Description = caption.InnerText,
116+
Source = src.TextContent,
117+
Description = caption.TextContent,
85118
};
86119
//i.Filter = i.Similarity < FilterThreshold;
120+
87121
return i;
88122
}
89123

90-
protected override SearchResult Process(HtmlDocument doc, SearchResult sr)
124+
protected override SearchResult Process(IDocument doc, SearchResult sr)
91125
{
92-
126+
93127
//var tables = doc.DocumentNode.SelectNodes("//table");
94128

95129
// Don't select other results
96130

97-
var pages = doc.DocumentNode.SelectSingleNode("//div[@id='pages']");
98-
var tables = pages.SelectNodes("div/table");
131+
var pages = doc.Body.SelectSingleNode("//div[@id='pages']");
132+
var tables = ((IHtmlElement) pages).SelectNodes("div/table");
99133

100134
// No relevant results?
101135

102-
bool noMatch = pages.ChildNodes.Any(n => n.GetAttributeValue("class", null) == "nomatch");
136+
//bool noMatch = pages.ChildNodes.Any(n => (n).GetAttr("class") == "nomatch");
137+
var ns = doc.Body.QuerySelector("#pages > div.nomatch");
103138

104-
if (noMatch) {
139+
if (ns != null) {
105140
//sr.ExtendedInfo.Add("No relevant results");
106-
141+
107142
// No relevant results
108-
143+
109144

110145
sr.Status = ResultStatus.NoResults;
111146

112147
return sr;
113148
}
114149

115-
var images = tables.Select(table => table.SelectNodes("tr"))
116-
.Select(ParseResult)
117-
.Cast<ImageResult>()
118-
.ToList();
150+
var select =
151+
tables.Select(table => ((IHtmlElement) table).QuerySelectorAll("table > tbody > tr:nth-child(n)"));
152+
153+
154+
var images = select.Select(ParseResult).ToList();
155+
119156

120157
// First is original image
121158
images.RemoveAt(0);

0 commit comments

Comments
 (0)