|
13 | 13 | using AngleSharp.XPath; |
14 | 14 | using Newtonsoft.Json.Linq; |
15 | 15 | using SimpleCore.Net; |
| 16 | +using static SimpleCore.Diagnostics.LogCategories; |
16 | 17 | using MimeType = SimpleCore.Net.MimeType; |
17 | 18 |
|
18 | 19 | // ReSharper disable InconsistentNaming |
@@ -43,8 +44,6 @@ public static class ImageHelper |
43 | 44 | * https://github.com/regosen/gallery_get |
44 | 45 | */ |
45 | 46 |
|
46 | | - |
47 | | - |
48 | 47 |
|
49 | 48 | public static (int Width, int Height) GetResolution(string s) |
50 | 49 | { |
@@ -88,54 +87,76 @@ public static DisplayResolutionType GetDisplayResolution(int w, int h) |
88 | 87 |
|
89 | 88 | } |
90 | 89 |
|
91 | | - public static bool IsDirect(string value) => MediaTypes.IsDirect(value, MimeType.Image); |
| 90 | + /// <summary> |
| 91 | + /// Determines whether <paramref name="url"/> is a direct image link |
| 92 | + /// </summary> |
| 93 | + /// <remarks>A direct image link is a link which points to a binary image file</remarks> |
| 94 | + public static bool IsDirect(string url) => MediaTypes.IsDirect(url, MimeType.Image); |
92 | 95 |
|
93 | | - public static string[] Scan(string s) |
| 96 | + /// <summary> |
| 97 | + /// Scans for direct image links in <paramref name="url"/> |
| 98 | + /// </summary> |
| 99 | + public static string[] FindDirectImages(string url) |
94 | 100 | { |
95 | | - |
96 | | - // TODO: WIP |
| 101 | + var rg = new List<string>(); |
97 | 102 |
|
98 | 103 | //<img.*?src="(.*?)" |
99 | 104 | //href\s*=\s*"(.+?)" |
100 | | - |
101 | | - var html = Network.GetString(s); |
102 | | - |
103 | 105 | //var src = "<img.*?src=\"(.*?)\""; |
104 | 106 | //var href = "href\\s*=\\s*\"(.+?)\""; |
105 | | - var href = "<a\\s+(?:[^>]*?\\s+)?href=\"([^\"]*)\""; |
106 | | - //var m = Regex.Matches(html, src); |
107 | | - var m2 = Regex.Matches(html, href); |
108 | 107 |
|
109 | | - //Debug.WriteLine($"{s} {m.Count} {m2.Count}"); |
| 108 | + var html = WebUtilities.GetString(url); |
| 109 | + |
| 110 | + const string HREF_PATTERN = "<a\\s+(?:[^>]*?\\s+)?href=\"([^\"]*)\""; |
110 | 111 |
|
| 112 | + var m2 = Regex.Matches(html, HREF_PATTERN); |
111 | 113 |
|
112 | | - for (int index = 0; index < m2.Count; index++) { |
113 | | - Match match = m2[index]; |
114 | | - var v = match.Groups; |
115 | 114 |
|
116 | | - for (int i = 0; i < v.Count; i++) { |
117 | | - Group @group = v[i]; |
| 115 | + for (int i = 0; i < m2.Count; i++) { |
| 116 | + var match = m2[i]; |
| 117 | + var groups = match.Groups; |
118 | 118 |
|
119 | | - foreach (Capture capture in @group.Captures) { |
120 | | - // this works but it's slow |
121 | | - if (Network.IsUri(capture.Value, out var u)) { |
122 | | - Debug.WriteLine($"[{index}, {i}] {u}"); |
| 119 | + for (int j = 0; j < groups.Count; j++) { |
| 120 | + var group = groups[j]; |
123 | 121 |
|
124 | | - } |
| 122 | + foreach (Capture capture in group.Captures) { |
| 123 | + |
| 124 | + rg.Add(capture.Value); |
125 | 125 | } |
126 | 126 | } |
127 | 127 | } |
128 | 128 |
|
| 129 | + string[] results = null; |
| 130 | + |
| 131 | + |
| 132 | + var t = Task.Run(() => |
| 133 | + { |
| 134 | + // todo: is running PLINQ within a task thread-safe? |
| 135 | + |
| 136 | + results = rg.AsParallel() |
| 137 | + .Where(e => Network.IsUri(e, out _) && IsDirect(e)) |
| 138 | + .ToArray(); |
| 139 | + |
| 140 | + Debug.WriteLine($"{nameof(FindDirectImages)}: {rg.Count} -> {results.Length}", C_DEBUG); |
| 141 | + }); |
| 142 | + |
| 143 | + |
| 144 | + var timeout = TimeSpan.FromSeconds(3); |
| 145 | + |
| 146 | + if (t.Wait(timeout)) { |
| 147 | + // |
| 148 | + } |
| 149 | + else { |
| 150 | + Debug.WriteLine($"{nameof(FindDirectImages)}: timeout!", C_WARN); |
| 151 | + } |
129 | 152 |
|
130 | | - var rg = new List<string>(); |
131 | 153 |
|
132 | | - return rg.ToArray(); |
| 154 | + return results; |
133 | 155 | } |
134 | 156 |
|
135 | 157 |
|
136 | 158 | /*public static string ResolveDirectLink(string s) |
137 | 159 | { |
138 | | - //todo: WIP |
139 | 160 | string d = ""; |
140 | 161 |
|
141 | 162 | try { |
|
0 commit comments