Skip to content

Commit f7ec54b

Browse files
committed
Direct images
1 parent 13f2f3a commit f7ec54b

File tree

16 files changed

+200
-71
lines changed

16 files changed

+200
-71
lines changed

SmartImage.Lib/Engines/BaseSearchEngine.cs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
using SmartImage.Lib.Searching;
33
using System;
44
using System.Diagnostics;
5+
using System.Linq;
56
using System.Threading.Tasks;
7+
using SmartImage.Lib.Utilities;
68
using static SimpleCore.Diagnostics.LogCategories;
79

810
namespace SmartImage.Lib.Engines
@@ -20,6 +22,7 @@ protected BaseSearchEngine(string baseUrl)
2022

2123
public virtual string Name => Engine.ToString();
2224

25+
2326
public virtual SearchResult GetResult(ImageQuery query)
2427
{
2528
var rawUrl = GetRawResultUrl(query);
@@ -34,6 +37,7 @@ public virtual SearchResult GetResult(ImageQuery query)
3437
sr.Status = ResultStatus.Success;
3538
}
3639

40+
3741
return sr;
3842
}
3943

@@ -43,11 +47,11 @@ public async Task<SearchResult> GetResultAsync(ImageQuery query)
4347

4448
var task = Task.Run(delegate
4549
{
46-
Debug.WriteLine($"{Name}: getting result async",C_INFO);
47-
50+
Debug.WriteLine($"{Name}: getting result async", C_INFO);
51+
4852
var res = GetResult(query);
49-
50-
Debug.WriteLine($"{Name}: result done",C_SUCCESS);
53+
54+
Debug.WriteLine($"{Name}: result done", C_SUCCESS);
5155

5256
return res;
5357
});
@@ -62,7 +66,7 @@ public Uri GetRawResultUrl(ImageQuery query)
6266
bool ok = Network.IsUriAlive(uri);
6367

6468
if (!ok) {
65-
Debug.WriteLine($"{uri.Host} is unavailable",C_WARN);
69+
Debug.WriteLine($"{uri.Host} is unavailable", C_WARN);
6670
return null;
6771
}
6872

SmartImage.Lib/Engines/Impl/Ascii2DEngine.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ protected override IDocument GetDocument(SearchResult sr)
3030
{
3131
var url = sr.RawUri.ToString();
3232

33-
var res = Network.GetSimpleResponse(url);
33+
var res = Network.GetResponse(url);
3434

3535
// Get redirect url (color url)
3636
var newUrl = res.ResponseUri.ToString();

SmartImage.Lib/Engines/Impl/IqdbEngine.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
using System.Diagnostics;
66
using System.Linq;
77
using System.Threading;
8+
using System.Threading.Tasks;
89
using AngleSharp.Dom;
910
using AngleSharp.Html.Dom;
1011
using AngleSharp.XPath;
1112
using SimpleCore.Net;
1213
using SimpleCore.Utilities;
1314
using SmartImage.Lib.Searching;
15+
using SmartImage.Lib.Utilities;
1416

1517
namespace SmartImage.Lib.Engines.Impl
1618
{
@@ -21,7 +23,7 @@ public IqdbEngine() : base("https://iqdb.org/?url=") { }
2123
public override SearchEngineOptions Engine => SearchEngineOptions.Iqdb;
2224

2325
public override string Name => "IQDB";
24-
26+
2527

2628
private static ImageResult ParseResult(IHtmlCollection<IElement> tr)
2729
{
@@ -34,8 +36,7 @@ private static ImageResult ParseResult(IHtmlCollection<IElement> tr)
3436
//img.ChildNodes[0].ChildNodes[0].TryGetAttribute("href")
3537

3638

37-
try
38-
{
39+
try {
3940
//url = src.FirstChild.ChildNodes[2].ChildNodes[0].TryGetAttribute("href");
4041

4142
url = img.ChildNodes[0].ChildNodes[0].TryGetAttribute("href");
@@ -79,7 +80,7 @@ private static ImageResult ParseResult(IHtmlCollection<IElement> tr)
7980
}
8081

8182
var uri = url != null ? new Uri(url) : null;
82-
83+
8384

8485
var result = new ImageResult
8586
{
@@ -91,6 +92,10 @@ private static ImageResult ParseResult(IHtmlCollection<IElement> tr)
9192
Description = caption.TextContent,
9293
};
9394

95+
96+
97+
98+
9499
return result;
95100
}
96101

SmartImage.Lib/Engines/InterpretedSearchEngine.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public abstract class InterpretedSearchEngine : BaseSearchEngine
2525

2626
protected InterpretedSearchEngine(string baseUrl) : base(baseUrl) { }
2727

28+
2829

2930
[DebuggerHidden]
3031
public override SearchResult GetResult(ImageQuery query)
@@ -60,7 +61,7 @@ protected virtual IDocument GetDocument(SearchResult sr)
6061
}*/
6162

6263

63-
string response = Network.GetString(sr.RawUri.ToString()!);
64+
string response = WebUtilities.GetString(sr.RawUri.ToString()!);
6465

6566
var parser = new HtmlParser();
6667
return parser.ParseDocument(response);

SmartImage.Lib/SearchClient.cs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,19 @@ public async Task RunSearchAsync()
130130
var value = await finished;
131131

132132
tasks.Remove(finished);
133-
133+
134+
if (Config.DirectUri) {
135+
value.PrimaryResult.FindDirectImages();
136+
}
137+
134138
if (!(Config.Filter && !value.IsNonPrimitive)) {
135139
Results.Add(value);
136-
140+
137141
// Call event
138142
ResultCompleted?.Invoke(null, new SearchResultEventArgs(value));
139143
}
140144

141-
145+
142146
IsComplete = !tasks.Any();
143147
}
144148

SmartImage.Lib/SearchConfig.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,33 @@ namespace SmartImage.Lib
1212
/// <summary>
1313
/// Contains configuration for <see cref="SearchClient"/>
1414
/// </summary>
15+
/// <remarks>Search config is only applicable when used in <see cref="SearchClient"/></remarks>
1516
public sealed class SearchConfig
1617
{
18+
/// <summary>
19+
/// Search query
20+
/// </summary>
1721
public ImageQuery Query { get; set; }
1822

23+
/// <summary>
24+
/// Search engines to use
25+
/// </summary>
1926
public SearchEngineOptions SearchEngines { get; set; } = SearchEngineOptions.All;
2027

28+
/// <summary>
29+
/// Priority engines
30+
/// </summary>
2131
public SearchEngineOptions PriorityEngines { get; set; }
2232

33+
/// <summary>
34+
/// Filters any non-primitive results; <see cref="SearchResult.IsNonPrimitive"/>
35+
/// </summary>
2336
public bool Filter { get; set; } = true;
2437

38+
/// <summary>
39+
/// Scan for direct image links; <see cref="ImageResult.FindDirectImages"/>
40+
/// </summary>
41+
public bool DirectUri { get; set; } = true;
42+
2543
}
2644
}

SmartImage.Lib/Searching/ImageQuery.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public ImageQuery([NotNull] string value, [CanBeNull] BaseUploadEngine engine =
6565

6666
Uri = IsUri ? new Uri(Value) : UploadEngine.Upload(Value);
6767

68-
Stream = IsFile ? File.OpenRead(value) : Network.GetStream(value);
68+
Stream = IsFile ? File.OpenRead(value) : WebUtilities.GetStream(value);
6969

7070
Trace.WriteLine($"{nameof(ImageQuery)}: {Uri}", C_SUCCESS);
7171
}

SmartImage.Lib/Searching/ImageResult.cs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33
using SmartImage.Lib.Utilities;
44
using System;
55
using System.Collections.Generic;
6+
using System.Diagnostics;
67
using System.Drawing;
78
using System.Linq;
89
using System.Reflection;
910
using System.Text;
11+
using System.Threading.Tasks;
1012
using Novus.Utilities;
13+
using static SimpleCore.Diagnostics.LogCategories;
1114

1215
#nullable enable
1316

@@ -19,10 +22,16 @@ namespace SmartImage.Lib.Searching
1922
public sealed class ImageResult
2023
{
2124
/// <summary>
22-
/// Url
25+
/// Result url
2326
/// </summary>
2427
public Uri? Url { get; set; }
2528

29+
/// <summary>
30+
/// Direct image link of <see cref="Url"/>
31+
/// </summary>
32+
public Uri? Direct { get; set; }
33+
34+
2635
/// <summary>
2736
/// Similarity
2837
/// </summary>
@@ -167,6 +176,7 @@ public DisplayResolutionType DisplayResolution
167176
public void UpdateFrom(ImageResult result)
168177
{
169178
Url = result.Url;
179+
Direct = result.Direct;
170180
Similarity = result.Similarity;
171181
Width = result.Width;
172182
Height = result.Height;
@@ -178,12 +188,29 @@ public void UpdateFrom(ImageResult result)
178188
Date = result.Date;
179189
}
180190

191+
public void FindDirectImages()
192+
{
193+
194+
if (Url is not null) {
195+
string? images = ImageHelper.FindDirectImages(Url?.ToString()).FirstOrDefault();
196+
197+
if (images is { }) {
198+
var uri = new Uri(images);
199+
200+
Direct = uri;
201+
}
202+
203+
}
204+
205+
}
206+
181207
public string ToString(bool indent)
182208
{
183209

184210
var sb = new ExtendedStringBuilder() { };
185211

186212
sb.Append(nameof(Url), Url);
213+
sb.Append(nameof(Direct), Direct);
187214

188215
if (Similarity.HasValue) {
189216
sb.Append($"{nameof(Similarity)}", $"{Similarity.Value / 100:P}");

SmartImage.Lib/Utilities/ImageHelper.cs

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
using AngleSharp.XPath;
1414
using Newtonsoft.Json.Linq;
1515
using SimpleCore.Net;
16+
using static SimpleCore.Diagnostics.LogCategories;
1617
using MimeType = SimpleCore.Net.MimeType;
1718

1819
// ReSharper disable InconsistentNaming
@@ -43,8 +44,6 @@ public static class ImageHelper
4344
* https://github.com/regosen/gallery_get
4445
*/
4546

46-
47-
4847

4948
public static (int Width, int Height) GetResolution(string s)
5049
{
@@ -88,54 +87,76 @@ public static DisplayResolutionType GetDisplayResolution(int w, int h)
8887

8988
}
9089

91-
public static bool IsDirect(string value) => MediaTypes.IsDirect(value, MimeType.Image);
90+
/// <summary>
91+
/// Determines whether <paramref name="url"/> is a direct image link
92+
/// </summary>
93+
/// <remarks>A direct image link is a link which points to a binary image file</remarks>
94+
public static bool IsDirect(string url) => MediaTypes.IsDirect(url, MimeType.Image);
9295

93-
public static string[] Scan(string s)
96+
/// <summary>
97+
/// Scans for direct image links in <paramref name="url"/>
98+
/// </summary>
99+
public static string[] FindDirectImages(string url)
94100
{
95-
96-
// TODO: WIP
101+
var rg = new List<string>();
97102

98103
//<img.*?src="(.*?)"
99104
//href\s*=\s*"(.+?)"
100-
101-
var html = Network.GetString(s);
102-
103105
//var src = "<img.*?src=\"(.*?)\"";
104106
//var href = "href\\s*=\\s*\"(.+?)\"";
105-
var href = "<a\\s+(?:[^>]*?\\s+)?href=\"([^\"]*)\"";
106-
//var m = Regex.Matches(html, src);
107-
var m2 = Regex.Matches(html, href);
108107

109-
//Debug.WriteLine($"{s} {m.Count} {m2.Count}");
108+
var html = WebUtilities.GetString(url);
109+
110+
const string HREF_PATTERN = "<a\\s+(?:[^>]*?\\s+)?href=\"([^\"]*)\"";
110111

112+
var m2 = Regex.Matches(html, HREF_PATTERN);
111113

112-
for (int index = 0; index < m2.Count; index++) {
113-
Match match = m2[index];
114-
var v = match.Groups;
115114

116-
for (int i = 0; i < v.Count; i++) {
117-
Group @group = v[i];
115+
for (int i = 0; i < m2.Count; i++) {
116+
var match = m2[i];
117+
var groups = match.Groups;
118118

119-
foreach (Capture capture in @group.Captures) {
120-
// this works but it's slow
121-
if (Network.IsUri(capture.Value, out var u)) {
122-
Debug.WriteLine($"[{index}, {i}] {u}");
119+
for (int j = 0; j < groups.Count; j++) {
120+
var group = groups[j];
123121

124-
}
122+
foreach (Capture capture in group.Captures) {
123+
124+
rg.Add(capture.Value);
125125
}
126126
}
127127
}
128128

129+
string[] results = null;
130+
131+
132+
var t = Task.Run(() =>
133+
{
134+
// todo: is running PLINQ within a task thread-safe?
135+
136+
results = rg.AsParallel()
137+
.Where(e => Network.IsUri(e, out _) && IsDirect(e))
138+
.ToArray();
139+
140+
Debug.WriteLine($"{nameof(FindDirectImages)}: {rg.Count} -> {results.Length}", C_DEBUG);
141+
});
142+
143+
144+
var timeout = TimeSpan.FromSeconds(3);
145+
146+
if (t.Wait(timeout)) {
147+
//
148+
}
149+
else {
150+
Debug.WriteLine($"{nameof(FindDirectImages)}: timeout!", C_WARN);
151+
}
129152

130-
var rg = new List<string>();
131153

132-
return rg.ToArray();
154+
return results;
133155
}
134156

135157

136158
/*public static string ResolveDirectLink(string s)
137159
{
138-
//todo: WIP
139160
string d = "";
140161
141162
try {

0 commit comments

Comments
 (0)