@@ -181,6 +181,48 @@ protected override async Task<Page> DowloadContent(Request request, ISpider spid
181181 }
182182 }
183183
184+ protected virtual string ReadContent ( Site site , HttpResponseMessage response )
185+ {
186+ byte [ ] contentBytes = response . Content . ReadAsByteArrayAsync ( ) . Result ;
187+ contentBytes = PreventCutOff ( contentBytes ) ;
188+ if ( string . IsNullOrWhiteSpace ( site . EncodingName ) )
189+ {
190+ var charSet = response . Content . Headers . ContentType ? . CharSet ;
191+ Encoding htmlCharset = EncodingExtensions . GetEncoding ( charSet , contentBytes ) ;
192+ return htmlCharset . GetString ( contentBytes , 0 , contentBytes . Length ) ;
193+ }
194+ else
195+ {
196+ return site . Encoding . GetString ( contentBytes , 0 , contentBytes . Length ) ;
197+ }
198+ }
199+
200+ private Page HandleResponse ( Request request , HttpResponseMessage response , Site site )
201+ {
202+ string content = ReadContent ( site , response ) ;
203+
204+ if ( _decodeHtml )
205+ {
206+ #if NET45
207+ content = HttpUtility . UrlDecode ( HttpUtility . HtmlDecode ( content ) , string . IsNullOrEmpty ( site . EncodingName ) ? Encoding . Default : site . Encoding ) ;
208+ #else
209+ content = System . Net . WebUtility . UrlDecode ( System . Net . WebUtility . HtmlDecode ( content ) ) ;
210+ #endif
211+ }
212+
213+ Page page = new Page ( request )
214+ {
215+ Content = content
216+ } ;
217+
218+ //foreach (var header in response.Headers)
219+ //{
220+ // page.Request.PutExtra(header.Key, header.Value);
221+ //}
222+
223+ return page ;
224+ }
225+
184226 private void PrepareHttpClient ( HttpClientEntry httpClientEntry )
185227 {
186228 httpClientEntry . Init ( AllowAutoRedirect , ( ) =>
@@ -278,48 +320,6 @@ private HttpRequestMessage GenerateHttpRequestMessage(Request request, Site site
278320 return httpRequestMessage ;
279321 }
280322
281- private Page HandleResponse ( Request request , HttpResponseMessage response , Site site )
282- {
283- string content = ReadContent ( site , response ) ;
284-
285- if ( _decodeHtml )
286- {
287- #if NET45
288- content = HttpUtility . UrlDecode ( HttpUtility . HtmlDecode ( content ) , string . IsNullOrEmpty ( site . EncodingName ) ? Encoding . Default : site . Encoding ) ;
289- #else
290- content = System . Net . WebUtility . UrlDecode ( System . Net . WebUtility . HtmlDecode ( content ) ) ;
291- #endif
292- }
293-
294- Page page = new Page ( request )
295- {
296- Content = content
297- } ;
298-
299- //foreach (var header in response.Headers)
300- //{
301- // page.Request.PutExtra(header.Key, header.Value);
302- //}
303-
304- return page ;
305- }
306-
307- private string ReadContent ( Site site , HttpResponseMessage response )
308- {
309- byte [ ] contentBytes = response . Content . ReadAsByteArrayAsync ( ) . Result ;
310- contentBytes = PreventCutOff ( contentBytes ) ;
311- if ( string . IsNullOrWhiteSpace ( site . EncodingName ) )
312- {
313- var charSet = response . Content . Headers . ContentType ? . CharSet ;
314- Encoding htmlCharset = EncodingExtensions . GetEncoding ( charSet , contentBytes ) ;
315- return htmlCharset . GetString ( contentBytes , 0 , contentBytes . Length ) ;
316- }
317- else
318- {
319- return site . Encoding . GetString ( contentBytes , 0 , contentBytes . Length ) ;
320- }
321- }
322-
323323 private Page SaveFile ( Request request , HttpResponseMessage response , ISpider spider )
324324 {
325325 var intervalPath = new Uri ( request . Url ) . LocalPath . Replace ( "//" , "/" ) . Replace ( "/" , Env . PathSeperator ) ;
0 commit comments