Skip to content

Commit 2c2332b

Browse files
committed
Re-add SauceNao HTML parsing
1 parent cde5030 commit 2c2332b

File tree

4 files changed

+203
-75
lines changed

4 files changed

+203
-75
lines changed
Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,36 @@
1-
using System.Drawing;
1+
using System;
2+
using System.Diagnostics;
3+
using System.Drawing;
4+
using System.IO;
5+
using System.Linq;
6+
using System.Net;
7+
using System.Net.Http;
8+
using System.Text.RegularExpressions;
9+
using System.Threading.Tasks;
10+
using HtmlAgilityPack;
11+
using RestSharp;
12+
using SimpleCore.Net;
13+
using SmartImage.Searching;
214

315
namespace SmartImage.Engines.Other
416
{
517
public sealed class BingEngine : SearchEngine
618
{
719
public BingEngine() : base("https://www.bing.com/images/searchbyimage?cbir=sbi&imgurl=") { }
820
public override SearchEngineOptions Engine => SearchEngineOptions.Bing;
9-
public override string Name => "Bing";
10-
public override Color Color => Color.DodgerBlue;
21+
public override string Name => "Bing";
22+
public override Color Color => Color.DodgerBlue;
23+
24+
// Parsing does not seem feasible ATM
25+
26+
public override FullSearchResult GetResult(string url)
27+
{
28+
return base.GetResult(url);
29+
}
30+
31+
public override string GetRawResultUrl(string url)
32+
{
33+
return base.GetRawResultUrl(url);
34+
}
1135
}
1236
}

SmartImage/Engines/SauceNao/SauceNaoEngine.cs

Lines changed: 170 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
using System.Drawing;
55
using System.Json;
66
using System.Linq;
7+
using System.Net;
8+
using HtmlAgilityPack;
79
using RestSharp;
810
using SimpleCore.Net;
911
using SimpleCore.Utilities;
@@ -13,15 +15,17 @@
1315
using JsonObject = System.Json.JsonObject;
1416

1517
#nullable enable
18+
19+
// ReSharper disable CommentTypo
20+
// ReSharper disable IdentifierTypo
1621
// ReSharper disable InconsistentNaming
1722
// ReSharper disable ParameterTypeCanBeEnumerable.Local
18-
#pragma warning disable HAA0502, HAA0601, HAA0102, HAA0401
23+
1924
namespace SmartImage.Engines.SauceNao
2025
{
2126
// https://github.com/RoxasShadow/SauceNao-Windows
2227
// https://github.com/LazDisco/SharpNao
2328

24-
// NOTE: It seems that the SauceNao API works regardless of whether or not an API key is used
2529

2630
/// <summary>
2731
/// SauceNao API client
@@ -55,94 +59,121 @@ public SauceNaoEngine() : this(SearchConfig.Config.SauceNaoAuth) { }
5559

5660
public override float? FilterThreshold => 70.00F;
5761

58-
private ISearchResult[] ConvertResults(SauceNaoDataResult[] results)
62+
#region HTML
63+
64+
private static (string? Creator, string? Material) FindCreator(HtmlNode resultcontent)
5965
{
60-
var rg = new List<ISearchResult>();
66+
var resulttitle = resultcontent.ChildNodes[0];
67+
string? rti = resulttitle?.InnerText;
6168

62-
foreach (var sn in results) {
63-
if (sn.Urls != null) {
64-
string? url = sn.Urls.FirstOrDefault(u => u != null)!;
65-
string? name = sn.Index.ToString();
69+
var resultcontentcolumn = resultcontent.ChildNodes[1];
70+
string? rcci = resultcontentcolumn?.InnerText;
6671

67-
var x = new BasicSearchResult(url, sn.Similarity,
68-
sn.WebsiteTitle, sn.Creator, sn.Material, sn.Character, name);
72+
var material = rcci?.SubstringAfter("Material: ");
6973

70-
x.Filter = x.Similarity < FilterThreshold;
74+
// var resultcontentcolumn2 = resultcontent.ChildNodes[2];
75+
// var rcci2 = resultcontentcolumn2?.InnerText;
7176

7277

73-
rg.Add(x);
74-
}
75-
}
78+
// Debug.WriteLine($"[{rti}] [{rcci}] {material}");
79+
80+
81+
string? creator = rti ?? rcci;
82+
creator = creator?.SubstringAfter("Creator: ");
83+
84+
return (creator, material);
7685

77-
return rg.ToArray();
7886
}
7987

8088

81-
public override FullSearchResult GetResult(string url)
89+
private static IEnumerable<SauceNaoDataResult> ParseResults(string url)
8290
{
83-
FullSearchResult result = base.GetResult(url);
91+
var doc = new HtmlDocument();
92+
doc.LoadHtml(Network.GetString(BASIC_RESULT + url));
8493

85-
try {
86-
var orig = GetResults(url);
94+
// todo: improve
8795

88-
if (orig == null) {
89-
return result;
96+
var results = doc.DocumentNode.SelectNodes("//div[@class='result']");
97+
98+
var images = new List<SauceNaoDataResult>();
99+
100+
foreach (var result in results) {
101+
if (result.GetAttributeValue("id", String.Empty) == "result-hidden-notification") {
102+
continue;
90103
}
91104

92-
// aggregate all info for primary result
105+
var n = result.FirstChild.FirstChild;
93106

94-
string? character = orig.FirstOrDefault(o => o.Character != null)?.Character;
95-
string? creator = orig.FirstOrDefault(o => o.Creator != null)?.Creator;
96-
string? material = orig.FirstOrDefault(o => o.Material != null)?.Material;
107+
//var resulttableimage = n.ChildNodes[0];
108+
var resulttablecontent = n.ChildNodes[1];
97109

110+
var resultmatchinfo = resulttablecontent.FirstChild;
111+
var resultsimilarityinfo = resultmatchinfo.FirstChild;
98112

99-
var extended = ConvertResults(orig);
113+
// Contains links
114+
var resultmiscinfo = resultmatchinfo.ChildNodes[1];
100115

101-
var ordered = extended
102-
.Where(e => e.Url != null)
103-
.OrderByDescending(e => e.Similarity);
116+
var links1 = resultmiscinfo.SelectNodes("a/@href");
117+
string? link1 = links1?[0].GetAttributeValue("href", null);
104118

105-
var best = ordered.First();
106119

107-
// Copy
108-
result.UpdateFrom(best);
120+
var resultcontent = resulttablecontent.ChildNodes[1];
109121

110-
result.Characters = character;
111-
result.Artist = creator;
112-
result.Source = material;
122+
//var resulttitle = resultcontent.ChildNodes[0];
113123

114-
result.AddExtendedResults(extended);
124+
var resultcontentcolumn = resultcontent.ChildNodes[1];
115125

116-
if (!String.IsNullOrWhiteSpace(m_apiKey)) {
117-
result.Metadata.Add("API", m_apiKey);
118-
}
126+
// Other way of getting links
127+
var links2 = resultcontentcolumn.SelectNodes("a/@href");
128+
string? link2 = links2?[0].GetAttributeValue("href", null);
119129

120-
}
121-
catch (Exception e) {
122-
Debug.WriteLine($"SauceNao error: {e.StackTrace}");
123-
result.AddErrorMessage(e.Message);
130+
string? link = link1 ?? link2;
131+
132+
var (creator, material) = FindCreator(resultcontent);
133+
float similarity = Single.Parse(resultsimilarityinfo.InnerText.Replace("%", String.Empty));
134+
135+
136+
var i = new SauceNaoDataResult
137+
{
138+
Urls = new[] {link}!,
139+
Similarity = similarity,
140+
Creator = creator,
141+
};
142+
143+
Debug.WriteLine($">>> {i}");
144+
145+
images.Add(i);
124146
}
125147

126-
return result;
148+
return images;
127149
}
128150

151+
#endregion
152+
153+
#region API
129154

130-
private SauceNaoDataResult[]? GetResults(string url)
155+
private ISearchResult[] ConvertResults(SauceNaoDataResult[] results)
131156
{
132-
var req = new RestRequest();
133-
req.AddQueryParameter("db", "999");
134-
req.AddQueryParameter("output_type", "2");
135-
req.AddQueryParameter("numres", "16");
136-
req.AddQueryParameter("api_key", m_apiKey);
137-
req.AddQueryParameter("url", url);
157+
var rg = new List<ISearchResult>();
138158

139-
var res = m_client.Execute(req);
159+
foreach (var sn in results) {
160+
if (sn.Urls != null) {
161+
string? url = sn.Urls.FirstOrDefault(u => u != null)!;
140162

141-
string c = res.Content;
163+
string? siteName = sn.Index != 0 ? sn.Index.ToString() : null;
142164

143-
return ReadResults(c);
144-
}
165+
var x = new BasicSearchResult(url, sn.Similarity,
166+
sn.WebsiteTitle, sn.Creator, sn.Material, sn.Character, siteName);
167+
168+
x.Filter = x.Similarity < FilterThreshold;
169+
170+
171+
rg.Add(x);
172+
}
173+
}
145174

175+
return rg.ToArray();
176+
}
146177

147178
private static SauceNaoDataResult[]? ReadResults(string js)
148179
{
@@ -212,37 +243,113 @@ public override FullSearchResult GetResult(string url)
212243
return null;
213244
}
214245

246+
private SauceNaoDataResult[]? GetResults_API(string url)
247+
{
248+
var req = new RestRequest();
249+
req.AddQueryParameter("db", "999");
250+
req.AddQueryParameter("output_type", "2");
251+
req.AddQueryParameter("numres", "16");
252+
req.AddQueryParameter("api_key", m_apiKey);
253+
req.AddQueryParameter("url", url);
254+
255+
var res = m_client.Execute(req);
256+
257+
//Debug.WriteLine($"{res.StatusCode}");
258+
259+
if (res.StatusCode == HttpStatusCode.Forbidden) {
260+
return null;
261+
}
262+
263+
string c = res.Content;
264+
265+
return ReadResults(c);
266+
}
267+
268+
#endregion
269+
270+
271+
public override FullSearchResult GetResult(string url)
272+
{
273+
FullSearchResult result = base.GetResult(url);
274+
275+
try {
276+
var orig = GetResults_API(url);
277+
278+
if (orig == null) {
279+
//return result;
280+
Debug.WriteLine("Parsing HTML from SN!");
281+
orig = ParseResults(url).ToArray();
282+
}
283+
284+
// aggregate all info for primary result
285+
286+
string? character = orig.FirstOrDefault(o => !String.IsNullOrWhiteSpace(o.Character))?.Character;
287+
string? creator = orig.FirstOrDefault(o => !String.IsNullOrWhiteSpace(o.Creator))?.Creator;
288+
string? material = orig.FirstOrDefault(o => !String.IsNullOrWhiteSpace(o.Material))?.Material;
289+
290+
291+
var extended = ConvertResults(orig);
292+
293+
var ordered = extended
294+
.Where(e => e.Url != null)
295+
.OrderByDescending(e => e.Similarity);
296+
297+
var best = ordered.First();
298+
299+
300+
// Copy
301+
result.UpdateFrom(best);
302+
303+
result.Characters = character;
304+
result.Artist = creator;
305+
result.Source = material;
306+
307+
result.AddExtendedResults(extended);
308+
309+
if (!String.IsNullOrWhiteSpace(m_apiKey)) {
310+
result.Metadata.Add("API", m_apiKey);
311+
}
312+
313+
}
314+
catch (Exception e) {
315+
Debug.WriteLine($"SauceNao error: {e.StackTrace}");
316+
result.AddErrorMessage(e.Message);
317+
}
318+
319+
return result;
320+
}
321+
215322

216323
private class SauceNaoDataResult
217324
{
218325
/// <summary>
219326
/// The url(s) where the source is from. Multiple will be returned if the exact same image is found in multiple places
220327
/// </summary>
221-
public string[]? Urls { get; internal init; }
328+
public string[]? Urls { get; internal set; }
222329

223330
/// <summary>
224331
/// The search index of the image
225332
/// </summary>
226-
public SauceNaoSiteIndex Index { get; internal init; }
333+
public SauceNaoSiteIndex Index { get; internal set; }
227334

228335
/// <summary>
229336
/// How similar is the image to the one provided (Percentage)?
230337
/// </summary>
231-
public float Similarity { get; internal init; }
338+
public float Similarity { get; internal set; }
232339

233-
public string? WebsiteTitle { get; set; }
340+
public string? WebsiteTitle { get; internal set; }
234341

235-
public string? Character { get; internal init; }
342+
public string? Character { get; internal set; }
236343

237-
public string? Material { get; internal init; }
344+
public string? Material { get; internal set; }
238345

239-
public string? Creator { get; internal init; }
346+
public string? Creator { get; internal set; }
240347

241348
public override string ToString()
242349
{
243350
string firstUrl = Urls != null ? Urls[0] : "-";
244351

245-
return $"{firstUrl} ({Similarity}, {Index})";
352+
return $"{firstUrl} ({Similarity}, {Index}) {Creator}";
246353
}
247354
}
248355
}

SmartImage/Program.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,9 @@ private static void Main(string[] args)
6161
/*
6262
* Set up NConsole
6363
*/
64-
6564
NConsole.Init();
6665
NConsoleInterface.DefaultName = Info.NAME_BANNER;
67-
66+
6867

6968
/*
7069
* Check for any legacy integrations that need to be migrated

0 commit comments

Comments
 (0)