33"""
44
55import sys
6+ from importlib import import_module
7+ from warnings import warn
68
79import six
8- from lxml import etree , html
10+ from lxml import etree
911
1012from .utils import flatten , iflatten , extract_regex , shorten
1113from .csstranslator import HTMLTranslator , GenericTranslator
1214
1315
16+ def _load_object (path ):
17+ """Load an object given its absolute object path, and return it.
18+
19+ `path` can point to a class, function, variable or a class instance. For
20+ example: ``'parsel.parser.html.HTML_PARSER'``.
21+ """
22+
23+ try :
24+ dot = path .rindex ('.' )
25+ except ValueError :
26+ raise ValueError ("Error loading object '%s': not a full path" % path )
27+
28+ module , name = path [:dot ], path [dot + 1 :]
29+ mod = import_module (module )
30+
31+ try :
32+ obj = getattr (mod , name )
33+ except AttributeError :
34+ raise NameError ("Module '%s' doesn't define any object named '%s'" % (module , name ))
35+
36+ return obj
37+
38+
1439class CannotRemoveElementWithoutRoot (Exception ):
1540 pass
1641
@@ -21,14 +46,16 @@ class CannotRemoveElementWithoutParent(Exception):
2146
2247class SafeXMLParser (etree .XMLParser ):
2348 def __init__ (self , * args , ** kwargs ):
49+ warn ('parsel.selector.SafeXMLParser is deprecated' ,
50+ DeprecationWarning , stacklevel = 2 )
2451 kwargs .setdefault ('resolve_entities' , False )
2552 super (SafeXMLParser , self ).__init__ (* args , ** kwargs )
2653
2754_ctgroup = {
28- 'html' : {'_parser' : html .HTMLParser ,
55+ 'html' : {'_parser' : 'parsel.parser. html.HTML_PARSER' ,
2956 '_csstranslator' : HTMLTranslator (),
3057 '_tostring_method' : 'html' },
31- 'xml' : {'_parser' : SafeXMLParser ,
58+ 'xml' : {'_parser' : 'parsel.parser.xml.XML_PARSER' ,
3259 '_csstranslator' : GenericTranslator (),
3360 '_tostring_method' : 'xml' },
3461}
@@ -46,6 +73,8 @@ def _st(st):
4673def create_root_node (text , parser_cls , base_url = None ):
4774 """Create root node for text using given parser class.
4875 """
76+ warn ('parsel.selector.create_root_node is deprecated' ,
77+ DeprecationWarning , stacklevel = 2 )
4978 body = text .strip ().replace ('\x00 ' , '' ).encode ('utf8' ) or b'<html/>'
5079 parser = parser_cls (recover = True , encoding = 'utf8' )
5180 root = etree .fromstring (body , parser = parser , base_url = base_url )
@@ -198,7 +227,7 @@ class Selector(object):
198227 def __init__ (self , text = None , type = None , namespaces = None , root = None ,
199228 base_url = None , _expr = None ):
200229 self .type = st = _st (type or self ._default_type )
201- self ._parser = _ctgroup [st ]['_parser' ]
230+ self ._parser = _load_object ( _ctgroup [st ]['_parser' ])
202231 self ._csstranslator = _ctgroup [st ]['_csstranslator' ]
203232 self ._tostring_method = _ctgroup [st ]['_tostring_method' ]
204233
@@ -219,7 +248,7 @@ def __getstate__(self):
219248 raise TypeError ("can't pickle Selector objects" )
220249
221250 def _get_root (self , text , base_url = None ):
222- return create_root_node ( text , self ._parser , base_url = base_url )
251+ return self ._parser . parse ( text = text , base_url = base_url )
223252
224253 def xpath (self , query , namespaces = None , ** kwargs ):
225254 """
0 commit comments