@@ -10,6 +10,8 @@ namespace PuppeteerSharp
1010{
1111 internal class CustomQueriesManager
1212 {
13+ private static readonly string [ ] CustomQuerySeparators = new [ ] { "=" , "/" } ;
14+ private readonly Dictionary < string , PuppeteerQueryHandler > _internalQueryHandlers ;
1315 private readonly Dictionary < string , PuppeteerQueryHandler > _queryHandlers = new ( ) ;
1416 private readonly PuppeteerQueryHandler _pierceHandler = CreatePuppeteerQueryHandler ( new CustomQueryHandler
1517 {
@@ -62,9 +64,7 @@ internal class CustomQueriesManager
6264 } ) ;
6365
6466 private readonly PuppeteerQueryHandler _ariaHandler = AriaQueryHandlerFactory . Create ( ) ;
65- private readonly Dictionary < string , PuppeteerQueryHandler > _builtInHandlers ;
6667 private readonly Regex _customQueryHandlerNameRegex = new ( "[a-zA-Z]+$" , RegexOptions . Compiled ) ;
67- private readonly Regex _customQueryHandlerParserRegex = new ( "(?<query>^[a-zA-Z]+)\\ /(?<selector>.*)" , RegexOptions . Compiled ) ;
6868 private readonly PuppeteerQueryHandler _defaultHandler = CreatePuppeteerQueryHandler ( new CustomQueryHandler
6969 {
7070 QueryOne = "(element, selector) => element.querySelector(selector)" ,
@@ -124,19 +124,54 @@ internal class CustomQueriesManager
124124 }" ,
125125 } ) ;
126126
127+ private readonly PuppeteerQueryHandler _xpathHandler = CreatePuppeteerQueryHandler ( new CustomQueryHandler
128+ {
129+ QueryOne = @"(element, selector) => {
130+ const doc = element.ownerDocument || document;
131+ const result = doc.evaluate(
132+ selector,
133+ element,
134+ null,
135+ XPathResult.FIRST_ORDERED_NODE_TYPE
136+ );
137+ return result.singleNodeValue;
138+ }" ,
139+ QueryAll = @"(element, selector) => {
140+ const doc = element.ownerDocument || document;
141+ const iterator = doc.evaluate(
142+ selector,
143+ element,
144+ null,
145+ XPathResult.ORDERED_NODE_ITERATOR_TYPE
146+ );
147+ const array = [];
148+ let item;
149+ while ((item = iterator.iterateNext())) {
150+ array.push(item);
151+ }
152+ return array;
153+ },
154+ })" ,
155+ } ) ;
156+
127157 public CustomQueriesManager ( )
128158 {
129- _builtInHandlers = new ( )
159+ _internalQueryHandlers = new ( )
130160 {
131161 [ "aria" ] = _ariaHandler ,
132162 [ "pierce" ] = _pierceHandler ,
133163 [ "text" ] = _textQueryHandler ,
164+ [ "xpath" ] = _xpathHandler ,
134165 } ;
135- _queryHandlers = _builtInHandlers . Clone ( ) ;
136166 }
137167
138168 internal void RegisterCustomQueryHandler ( string name , CustomQueryHandler queryHandler )
139169 {
170+ if ( _internalQueryHandlers . ContainsKey ( name ) )
171+ {
172+ throw new PuppeteerException ( $ "A query handler named \" { name } \" already exists") ;
173+ }
174+
140175 if ( _queryHandlers . ContainsKey ( name ) )
141176 {
142177 throw new PuppeteerException ( $ "A custom query handler named \" { name } \" already exists") ;
@@ -155,21 +190,23 @@ internal void RegisterCustomQueryHandler(string name, CustomQueryHandler queryHa
155190
156191 internal ( string UpdatedSelector , PuppeteerQueryHandler QueryHandler ) GetQueryHandlerAndSelector ( string selector )
157192 {
158- var customQueryHandlerMatch = _customQueryHandlerParserRegex . Match ( selector ) ;
159- if ( ! customQueryHandlerMatch . Success )
160- {
161- return ( selector , _defaultHandler ) ;
162- }
163-
164- var name = customQueryHandlerMatch . Groups [ "query" ] . Value ;
165- var updatedSelector = customQueryHandlerMatch . Groups [ "selector" ] . Value ;
193+ var handlers = _internalQueryHandlers . Concat ( _queryHandlers ) ;
166194
167- if ( ! _queryHandlers . TryGetValue ( name , out var queryHandler ) )
195+ foreach ( var kv in handlers )
168196 {
169- throw new PuppeteerException ( $ "Query set to use \" { name } \" , but no query handler of that name was found") ;
197+ foreach ( var separator in CustomQuerySeparators )
198+ {
199+ var prefix = $ "{ kv . Key } { separator } ";
200+
201+ if ( selector . StartsWith ( prefix , StringComparison . OrdinalIgnoreCase ) )
202+ {
203+ selector = selector . Substring ( prefix . Length ) ;
204+ return ( selector , kv . Value ) ;
205+ }
206+ }
170207 }
171208
172- return ( updatedSelector , queryHandler ) ;
209+ return ( selector , _defaultHandler ) ;
173210 }
174211
175212 internal IEnumerable < string > GetCustomQueryHandlerNames ( )
@@ -267,7 +304,6 @@ await handle.ExecutionContext.World.GetPuppeteerUtilAsync().ConfigureAwait(false
267304 return internalHandler ;
268305 }
269306
270- private IEnumerable < string > CustomQueryHandlerNames ( )
271- => _queryHandlers . Keys . ToArray ( ) . Where ( k => ! _builtInHandlers . ContainsKey ( k ) ) ;
307+ private IEnumerable < string > CustomQueryHandlerNames ( ) => _queryHandlers . Keys . ToArray ( ) ;
272308 }
273309}
0 commit comments