Skip to content

Commit f09b02e

Browse files
committed
Image scanning
1 parent f7f2b32 commit f09b02e

File tree

4 files changed

+139
-9
lines changed

4 files changed

+139
-9
lines changed

SmartImage.Lib/Engines/Impl/IqdbEngine.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ private IDocument GetDocument(ImageQuery query)
123123
rq.AddHeader("Content-Type", "multipart/form-data");
124124

125125
byte[] fileBytes = Array.Empty<byte>();
126-
object uri = string.Empty;
126+
object uri = String.Empty;
127127

128128
if (query.IsFile) {
129129
fileBytes = File.ReadAllBytes(query.Value);

SmartImage.Lib/Utilities/ImageHelper.cs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using System.Diagnostics;
44
using System.Linq;
55
using System.Text.RegularExpressions;
6+
using System.Threading;
67
using System.Threading.Tasks;
78
using AngleSharp;
89
using AngleSharp.Dom;
@@ -12,6 +13,7 @@
1213
using AngleSharp.XPath;
1314
using Newtonsoft.Json.Linq;
1415
using SimpleCore.Net;
16+
using SimpleCore.Utilities;
1517
using static SimpleCore.Diagnostics.LogCategories;
1618
using MimeType = SimpleCore.Net.MimeType;
1719

@@ -99,6 +101,100 @@ public static bool IsDirect2(string url)
99101
RegexOptions.IgnoreCase);
100102
}
101103

104+
/// <summary>
105+
/// Break a list of items into chunks of a specific size
106+
/// </summary>
107+
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
108+
{
109+
while (source.Any()) {
110+
yield return source.Take(chunksize);
111+
source = source.Skip(chunksize);
112+
}
113+
}
114+
115+
public static List<string> FindDirectImagesEx(string url)
116+
{
117+
118+
var rg = new List<string>();
119+
120+
//<img.*?src="(.*?)"
121+
//href\s*=\s*"(.+?)"
122+
//var src = "<img.*?src=\"(.*?)\"";
123+
//var href = "href\\s*=\\s*\"(.+?)\"";
124+
125+
string html;
126+
127+
try {
128+
html = WebUtilities.GetString(url);
129+
}
130+
catch (Exception e) {
131+
Debug.WriteLine($"{e.Message}", C_ERROR);
132+
return null;
133+
}
134+
135+
var p = new HtmlParser();
136+
var d = p.ParseDocument(html);
137+
138+
var img = d.QuerySelectorAll("img");
139+
var a = d.QuerySelectorAll("a");
140+
141+
//Debug.WriteLine($"{img.Length} | {a.Length}");
142+
143+
//rg.AddRange(img.Select(s=>s.GetAttribute("src")));
144+
CancellationTokenSource cts = new CancellationTokenSource();
145+
146+
var flat = new List<string>();
147+
148+
flat.AddRange(a.Select(s => s.GetAttribute("href")).ToList());
149+
flat.AddRange(img.Select(s => s.GetAttribute("src")));
150+
151+
var seg = flat.Chunk(10).ToArray();
152+
var trg = new List<Task>();
153+
154+
155+
for (int i = 0; i < seg.Length; i++) {
156+
//ThreadPool.QueueUserWorkItem(GetWorker(v.ToList()), cts.Token);
157+
//var t=new Thread(() =>GetWorker(v.ToList()));
158+
//t.Priority = ThreadPriority.AboveNormal;
159+
//trg.Add(t);
160+
//t.Start();
161+
162+
int i1 = i;
163+
164+
trg.Add(Task.Factory.StartNew(() =>
165+
{
166+
Debug.WriteLine("Init");
167+
168+
foreach (string s in seg[i1]) {
169+
170+
if (Network.IsUri(s, out var u2) && Network.IsUriAlive(u2, TimeSpan.FromSeconds(1))) {
171+
172+
var vb = IsDirect2(s);
173+
174+
if (vb) {
175+
rg.Add(s);
176+
Debug.WriteLine($">>>{s}");
177+
}
178+
179+
180+
}
181+
}
182+
}));
183+
184+
}
185+
186+
187+
//var c=ThreadPool.PendingWorkItemCount;
188+
189+
Task.WaitAll(trg.ToArray());
190+
//while (trg.Any(t => !t.IsCompleted)) { } //spin wait
191+
//SpinWait.SpinUntil(() => ThreadPool.PendingWorkItemCount == 0);
192+
//SpinWait.SpinUntil(() => trg.All(x=>!x.IsAlive));
193+
194+
return rg;
195+
196+
}
197+
102198

103199
/// <summary>
104200
/// Scans for direct image links in <paramref name="url"/>

Test/Program.cs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,17 +141,23 @@ public static async Task Main(string[] args)
141141
}*/
142142

143143

144-
var i = new IqdbEngine();
145-
var i2 = i.GetResultAsync(q);
146-
var r2 = await i2;
144+
//var i = new IqdbEngine();
145+
//var i2 = i.GetResultAsync(q);
146+
//var r2 = await i2;
147147

148-
Console.WriteLine(">> {0}", r2);
148+
//Console.WriteLine(">> {0}", r2);
149149

150-
var ix = new SauceNaoEngine() { };
151-
var i2x = ix.GetResultAsync(q);
152-
var r2x = await i2x;
150+
//var ix = new SauceNaoEngine() { };
151+
//var i2x = ix.GetResultAsync(q);
152+
//var r2x = await i2x;
153153

154-
Console.WriteLine(">> {0}", r2x);
154+
//Console.WriteLine(">> {0}", r2x);
155+
156+
var t = ImageHelper.FindDirectImagesEx("https://www.zerochan.net/2750747");
157+
158+
foreach (var s in t) {
159+
Console.WriteLine(s);
160+
}
155161
}
156162
}
157163
}

UnitTest/UnitTest1.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,34 @@ public async Task TestSauceNao(string art, string name)
8484
Assert.True(a || b);
8585

8686

87+
}
88+
89+
[Test]
90+
[TestCase(@"C:\Users\Deci\Pictures\Test Images\Test1.jpg")]
91+
[TestCase(@"C:\Users\Deci\Pictures\Test Images\Test2.jpg")]
92+
public async Task TestIqdb(string art)
93+
{
94+
var q = new ImageQuery(art);
95+
var i = new IqdbEngine();
96+
var rt = i.GetResultAsync(q);
97+
var t = await rt;
98+
99+
if (t.Status == ResultStatus.Unavailable) {
100+
Assert.Inconclusive();
101+
}
102+
103+
//t.Consolidate();
104+
105+
106+
var a = t.IsNonPrimitive;
107+
108+
109+
var b = t.OtherResults.Any(r =>
110+
{
111+
return r.DetailScore >= 3 && r.Site!=null;
112+
});
113+
114+
Assert.True(a || b);
87115

88116

89117
}

0 commit comments

Comments
 (0)