66using System . IO ;
77using System . Linq ;
88using System . Net ;
9+ using System . Net . Http ;
910using System . Text . RegularExpressions ;
1011using System . Threading ;
1112using System . Threading . Tasks ;
@@ -98,7 +99,6 @@ public static string Download(Uri src, string path)
9899 // using var h = new HttpClient();
99100 // h.DownloadFile(src.ToString(), combine);
100101
101-
102102 return combine ;
103103 }
104104 catch ( Exception e ) {
@@ -124,41 +124,49 @@ public static async Task<List<DirectImage>> ScanForImages(string url, int count
124124 document = WebUtilities . GetHtmlDocument ( url ) ;
125125 }
126126 catch ( Exception e ) {
127- Debug . WriteLine ( $ "{ nameof ( ImageHelper ) } : { e . Message } ", C_ERROR ) ;
127+ Debug . WriteLine ( $ "{ nameof ( WebUtilities ) } : { e . Message } ", C_ERROR ) ;
128128
129129 return null ;
130130 }
131131
132- using var cts = new CancellationTokenSource ( ) ;
133- var flat = new List < string > ( ) ;
132+ using var cts = new CancellationTokenSource ( ) ;
133+
134+ var urls = new List < string > ( ) ;
134135
135- flat . AddRange ( document . QuerySelectorAttributes ( "a" , "href" ) ) ;
136- flat . AddRange ( document . QuerySelectorAttributes ( "img" , "src" ) ) ;
136+ urls . AddRange ( document . QuerySelectorAttributes ( "a" , "href" ) ) ;
137+ urls . AddRange ( document . QuerySelectorAttributes ( "img" , "src" ) ) ;
138+
139+ urls = urls . Where ( x => x != null ) . Select ( u1 =>
140+ {
141+ if ( UriUtilities . IsUri ( u1 , out var u2 ) ) {
142+ return UriUtilities . NormalizeUrl ( u2 ) ;
143+ }
137144
138- flat = flat . Where ( x=> x != null ) . Distinct ( ) . ToList ( ) ;
145+ return u1 ;
146+ } ) . Distinct ( ) . ToList ( ) ;
139147
140- var tasks = new List < Task < DirectImage > > ( ) ;
141148
149+ var tasks = new List < Task < DirectImage > > ( ) ;
142150 var hostComponent = UriUtilities . GetHostComponent ( new Uri ( url ) ) ;
143151
144152 switch ( hostComponent ) {
145153 case "www.deviantart.com" :
146154 //https://images-wixmp-
147- flat = flat . Where ( x => x . StartsWith ( "https://images-wixmp" ) ) . ToList ( ) ;
155+ urls = urls . Where ( x => x . StartsWith ( "https://images-wixmp" ) ) . ToList ( ) ;
148156 break ;
149157 default :
150158 break ;
151159 }
152-
153160
154- for ( int i = 0 ; i < flat . Count ; i ++ ) {
161+
162+ for ( int i = 0 ; i < urls . Count ; i ++ ) {
155163 int iCopy = i ;
156164
157165 tasks . Add ( Task < DirectImage > . Factory . StartNew ( ( ) =>
158166 {
159- string s = flat [ iCopy ] ;
167+ string s = urls [ iCopy ] ;
160168
161- if ( IsImage ( s , ( int ) timeoutMS , DirectImageCriterion . Binary , out var di ) ) {
169+ if ( IsImage ( s , ( int ) timeoutMS , out var di ) ) {
162170 return di ;
163171 }
164172
@@ -174,7 +182,7 @@ public static async Task<List<DirectImage>> ScanForImages(string url, int count
174182 var result = task . Result ;
175183
176184 if ( result is { } && count > 0 ) {
177- result . Url = new Uri ( UriUtilities . NormalizeUrl ( result . Url ) ) ;
185+ // result.Url = new Uri(UriUtilities.NormalizeUrl(result.Url));
178186 images . Add ( result ) ;
179187 count -- ;
180188 }
@@ -184,68 +192,54 @@ public static async Task<List<DirectImage>> ScanForImages(string url, int count
184192 return images ;
185193 }
186194
187- public static bool IsImage ( string url , out DirectImage di , DirectImageCriterion directCriterion = DirectImageCriterion . Binary )
188- => IsImage ( url , TimeoutMS , directCriterion , out di ) ;
189-
190-
191- public static bool IsImage ( string url , long timeout , DirectImageCriterion directCriterion , out DirectImage di )
192- {
193- di = new DirectImage ( ) { } ;
194-
195- switch ( directCriterion ) {
196- case DirectImageCriterion . Regex :
197- var image = Regex . IsMatch (
198- url ,
199- @"(?:([^:\/?#]+):)?(?:\/\/([^\/?#]*))?([^?#]*\.(?:bmp|gif|ico|jfif|jpe?g|png|svg|tiff?|webp))(?:\?([^#]*))?(?:#(.*))?" ,
200- RegexOptions . IgnoreCase ) ;
201- di . Url = new Uri ( url ) ;
202-
203- return image ;
204- case DirectImageCriterion . Binary :
205- if ( ! UriUtilities . IsUri ( url , out var u ) ) {
206- return false ;
207- }
208-
209- var response = HttpUtilities . GetResponse ( u . ToString ( ) , ( int ) timeout , Method . HEAD ) ;
210-
211- if ( ! response . IsSuccessful ) {
195+ public static bool IsImage ( string url , out DirectImage di ) => IsImage ( url , TimeoutMS , out di ) ;
212196
213- return false ;
214- }
215197
216- di . Url = new Uri ( url ) ;
198+ public static bool IsImage ( string url , long timeout , out DirectImage di )
199+ {
200+ di = new DirectImage ( ) { } ;
217201
218- di . Response = response ;
202+ var response = HttpUtilities . GetResponse ( url , ( int ) timeout , Method . HEAD ) ;
219203
220- /* Check content-type */
204+ if ( ! response . IsSuccessful ) {
205+ return false ;
206+ }
221207
222- // The content-type returned from the response may not be the actual content-type, so
223- // we'll resolve it using binary data instead to be sure
224- bool a , b ;
208+ di . Url = new Uri ( url ) ;
209+ di . Response = response ;
225210
226- try {
227- var stream = WebUtilities . GetStream ( url ) ;
228- var buffer = new byte [ 256 ] ;
229- stream . Read ( buffer , 0 , buffer . Length ) ;
230- // var rg = response.RawBytes;
231- var m = MediaTypes . ResolveFromData ( buffer ) ;
232- a = m . StartsWith ( "image" ) && m != "image/svg+xml" ;
233- b = response . ContentLength is - 1 or >= 50_000 ;
234- di . Stream = stream ;
235- }
236- catch {
237- a = response . ContentType . StartsWith ( "image" ) && response . ContentType != "image/svg+xml" ;
238- b = response . ContentLength >= 50_000 ;
239- }
211+ /* Check content-type */
240212
213+ // The content-type returned from the response may not be the actual content-type, so
214+ // we'll resolve it using binary data instead to be sure
215+ bool type , size ;
241216
242- // var b = stream.Length >= 50_000;
217+ const string svg_xml = "image/svg+xml" ;
218+ const string image = "image" ;
219+ const int min_size_b = 50_000 ;
243220
244- return a && b ;
245- default :
246- throw new ArgumentOutOfRangeException ( nameof ( directCriterion ) , directCriterion , null ) ;
221+ try {
222+ using var client = new HttpClient ( ) ;
223+ var task = client . GetStreamAsync ( url ) ;
224+ task . Wait ( ( int ) timeout ) ;
225+
226+ var stream = task . Result ;
227+
228+ var buffer = new byte [ 256 ] ;
229+ stream . Read ( buffer , 0 , buffer . Length ) ;
230+ var m = MediaTypes . ResolveFromData ( buffer ) ;
231+ type = m . StartsWith ( image ) && m != svg_xml ;
232+ size = response . ContentLength is - 1 or >= min_size_b ;
233+ di . Stream = stream ;
234+ }
235+ catch ( Exception x ) {
236+ type = response . ContentType . StartsWith ( image ) && response . ContentType != svg_xml ;
237+ size = response . ContentLength >= min_size_b ;
238+ Debug . WriteLine ( $ "{ x . Message } ") ;
247239 }
248240
241+ return type && size ;
242+
249243 }
250244
251245 /*
@@ -339,12 +333,6 @@ public static Bitmap ResizeImage(Bitmap mg, Size newSize)
339333 }
340334}
341335
342- public enum DirectImageCriterion
343- {
344- Binary ,
345- Regex
346- }
347-
348336public enum DisplayResolutionType
349337{
350338 Unknown ,
0 commit comments