@@ -16,30 +16,67 @@ public class BrowscapXmlHelper
1616 {
1717 private readonly List < string > _crawlerUserAgentsRegexp ;
1818
19- public BrowscapXmlHelper ( string filePath )
19+ public BrowscapXmlHelper ( string userAgentStringsPath , string crawlerOnlyUserAgentStringsPath )
2020 {
2121 _crawlerUserAgentsRegexp = new List < string > ( ) ;
2222
23- Initialize ( filePath ) ;
23+ Initialize ( userAgentStringsPath , crawlerOnlyUserAgentStringsPath ) ;
2424 }
2525
26- private void Initialize ( string filePath )
26+ private void Initialize ( string userAgentStringsPath , string crawlerOnlyUserAgentStringsPath )
2727 {
28- using ( var sr = new StreamReader ( filePath ) )
28+ List < XElement > crawlerItems = null ;
29+
30+ if ( ! string . IsNullOrEmpty ( crawlerOnlyUserAgentStringsPath ) && File . Exists ( crawlerOnlyUserAgentStringsPath ) )
31+ {
32+ //try to load crawler list from crawlers only file
33+ using ( var sr = new StreamReader ( crawlerOnlyUserAgentStringsPath ) )
34+ {
35+ crawlerItems = XDocument . Load ( sr ) . Root . Return ( x => x . Elements ( "browscapitem" ) . ToList ( ) , null ) ;
36+ }
37+ }
38+
39+ if ( crawlerItems == null )
40+ {
41+ //try to load crawler list from full user agents file
42+ using ( var sr = new StreamReader ( userAgentStringsPath ) )
43+ {
44+ crawlerItems = XDocument . Load ( sr ) . Root . Return ( x => x . Element ( "browsercapitems" ) , null )
45+ //only crawlers
46+ . Return ( x => x . Elements ( "browscapitem" ) . Where ( IsBrowscapItemIsCrawler ) . ToList ( ) , null ) ;
47+ }
48+ }
49+
50+ if ( crawlerItems == null )
51+ throw new Exception ( "Incorrect file format" ) ;
52+
53+ _crawlerUserAgentsRegexp . AddRange ( crawlerItems
54+ //get only user agent names
55+ . Select ( e => e . Attribute ( "name" ) )
56+ . Where ( e => e != null && ! string . IsNullOrEmpty ( e . Value ) )
57+ . Select ( e => e . Value )
58+ . Select ( ToRegexp ) ) ;
59+
60+ if ( string . IsNullOrEmpty ( crawlerOnlyUserAgentStringsPath ) || File . Exists ( crawlerOnlyUserAgentStringsPath ) )
61+ return ;
62+
63+ //try to write crawlers file
64+ using ( var sw = new StreamWriter ( crawlerOnlyUserAgentStringsPath ) )
2965 {
30- var browsercapItems = XDocument . Load ( sr ) . Root . Return ( x => x . Element ( "browsercapitems" ) , null ) ;
66+ var root = new XElement ( "browsercapitems" ) ;
3167
32- if ( browsercapItems == null )
33- throw new Exception ( "Incorrect file format" ) ;
68+ foreach ( var crawler in crawlerItems )
69+ {
70+ foreach ( var element in crawler . Elements ( ) . ToList ( ) )
71+ {
72+ if ( element . Attribute ( "name" ) . Return ( x => x . Value . ToLower ( ) , string . Empty ) == "crawler" )
73+ continue ;
74+ element . Remove ( ) ;
75+ }
3476
35- _crawlerUserAgentsRegexp . AddRange ( browsercapItems . Elements ( "browscapitem" )
36- //only crawlers
37- . Where ( IsBrowscapItemIsCrawler )
38- //get only user agent names
39- . Select ( e => e . Attribute ( "name" ) )
40- . Where ( e => e != null && ! string . IsNullOrEmpty ( e . Value ) )
41- . Select ( e => e . Value )
42- . Select ( ToRegexp ) ) ;
77+ root . Add ( crawler ) ;
78+ }
79+ root . Save ( sw ) ;
4380 }
4481 }
4582
0 commit comments