77
88from ordered_set import OrderedSet
99
10+ from core .mixins import PrettyReprMixin
1011from pyttman .core .exceptions import InvalidPyttmanObjectException
1112from pyttman .core .containers import MessageMixin
1213from pyttman .core .entity_parsing .entity import Entity
1314from pyttman .core .entity_parsing .identifiers import Identifier
1415
1516
16- class Parser (ABC ):
17- """
18- Base class for the Parser API in Pyttman.
19- The various entity_fields in Pyttman inherit from this
20- base class.
21-
22- field identifier:
23- An optional Identifier class can be supplied as an Identifier.
24- The Identifier's job is finding strings in a message which
25- matches its own patterns.
26- """
27- identifier : Identifier = None
28- exclude : Tuple = ()
29- prefixes : Tuple = ()
30- suffixes : Tuple = ()
31- case_preserved_cache = set ()
32-
33- def __init__ (self , ** kwargs ):
34- if hasattr (self , "value" ):
35- raise AttributeError (
36- "The field 'value' is reserved for internal use. "
37- "Please choose a different name for the field." )
38- self .value = None
39- for k , v in kwargs .items ():
40- setattr (self , k , v )
41-
42- def __repr__ (self ):
43- return f"{ self .__class__ .__name__ } (" \
44- f"exclude={ self .exclude } , " \
45- f"identifier={ self .identifier } , " \
46- f"value={ self .value } )"
47-
48- def reset (self ) -> None :
49- """
50- Resets the parser, defaulting it's value to None.
51- :return: None
52- """
53- self .value = None
54-
55- @abc .abstractmethod
56- def parse_message (self , message : MessageMixin ,
57- memoization : dict = None ) -> None :
58- """
59- Subclasses override this method, defining the
60- logic for parsing the message contents and
61- identifying the value of interest in each
62- field in the EntityParser class in which
63- these classes are created in as fields.
64- """
65- pass
66-
67-
68- class EntityFieldValueParser (Parser ):
17+ class EntityFieldValueParser (PrettyReprMixin ):
6918 """
7019 This class is used by EntityField classes primarily,
7120 as the inner-working engine for identifying and finding
7221 values which match the pattern provided in the declarative
7322 EntityParser Api component: 'EntityField'.
7423 """
75- truncates_message_in_parsing = True
76- default = None
24+ __repr_fields__ = ("identifier" , "exclude" , "prefixes" , "suffixes" )
7725
7826 def __init__ (self ,
7927 prefixes : tuple | typing .Callable = None ,
8028 suffixes : tuple | typing .Callable = None ,
8129 valid_strings : tuple | typing .Callable = None ,
8230 default : typing .Any | typing .Callable = None ,
8331 span : int | typing .Callable = 0 ,
84- identifier : Type [Identifier ] = None ,
32+ identifier : Type [Identifier ] | None = None ,
33+ exclude : typing .Iterable [str ] = None ,
8534 ** kwargs ):
86- super ().__init__ (** kwargs )
87-
88- if prefixes is None :
89- prefixes = tuple ()
90- if suffixes is None :
91- suffixes = tuple ()
92- if valid_strings is None :
93- valid_strings = tuple ()
94-
95- self .prefixes = prefixes
96- self .suffixes = suffixes
97- self .identifier : Type [Identifier ] = identifier
98- self .span = span
99- self .valid_strings = valid_strings
10035
101- if default is not None :
102- self .default = default
36+ if hasattr (self , "value" ):
37+ raise AttributeError ("The field 'value' is reserved for internal "
38+ "use. Please choose a different name for "
39+ "the field." )
10340
41+ self .truncates_message_in_parsing = True
42+ self .value = None
43+ self .case_preserved_cache = set ()
44+ self .prefixes = prefixes or tuple ()
45+ self .suffixes = suffixes or tuple ()
46+ self .exclude = exclude or tuple ()
47+ self .valid_strings = valid_strings or tuple ()
48+ self .default = default or None
49+ self .identifier = identifier
50+ self .span = span
10451 self ._properties_for_evaluation = {
10552 "prefixes" : self .prefixes ,
10653 "suffixes" : self .suffixes ,
10754 "span" : self .span ,
10855 "default" : self .default ,
10956 "valid_strings" : self .valid_strings
11057 }
111-
112- def __repr__ (self ):
113- return f"{ self .__class__ .__name__ } (value='{ self .value } ', " \
114- f"identifier={ self .identifier } , prefixes={ self .prefixes } , " \
115- f"suffixes={ self .suffixes } , span={ self .span } )"
58+ print ("SET EXLUDE TO" , self .exclude , "ON" , self )
11659
11760 def _prepare_params (self ):
11861 """
@@ -156,6 +99,13 @@ def _prepare_params(self):
15699 self .valid_strings = tuple (
157100 [i .casefold () for i in self .valid_strings ])
158101
102+ def reset (self ) -> None :
103+ """
104+ Resets the parser, defaulting it's value to None.
105+ :return: None
106+ """
107+ self .value = None
108+
159109 def parse_message (self , message : MessageMixin ,
160110 memoization : dict = None ) -> None :
161111 """
@@ -214,7 +164,7 @@ def _identify_value(self, message: MessageMixin,
214164 ... these conditions need to be evaluated for each scenario.
215165
216166 Pre- and Suffix tuples can contain strings or Parsers, or a
217- combination. When Parser instances are used in these tuples,
167+ combination. When EntityFieldValueParser instances are used in these tuples,
218168 their last occurring string, separated by spaces, is chosen
219169 as the ultimate prefix. This is so due to the fact that Parsers
220170 may span across multiple elements in the string collection
@@ -249,7 +199,7 @@ def _identify_value(self, message: MessageMixin,
249199 for rule , rule_collection in {i_prefix : prefixes ,
250200 i_suffix : suffixes }.items ():
251201 if rule is not None :
252- if isinstance (rule , Parser ) and rule .value is not None :
202+ if isinstance (rule , EntityFieldValueParser ) and rule .value is not None :
253203 entity : Entity = rule .value
254204 rule_collection .append (entity .value
255205 .split ().pop ().lower ().strip ())
@@ -381,6 +331,7 @@ def _identify_value(self, message: MessageMixin,
381331 break
382332 else :
383333 if span_value not in self .exclude :
334+ print (f"{ span_value } is not in { self .exclude } for { self } " )
384335 parsed_entity .value += f" { span_value } "
385336 return parsed_entity
386337
@@ -389,7 +340,7 @@ def parse_entities(message: MessageMixin,
389340 entity_fields : dict ,
390341 exclude : tuple = None ) -> dict :
391342 """
392- Traverse over all fields which are Parser subclasses.
343+ Traverse over all fields which are EntityFieldValueParser subclasses.
393344 Have them identify their values according to their
394345 constraints and conditions, and store them in a
395346 dictionary, returned at the end of parsing.
@@ -402,7 +353,7 @@ def parse_entities(message: MessageMixin,
402353 if exclude is None :
403354 exclude = tuple ()
404355
405- # The memoization dict is provided each Parser instance
356+ # The memoization dict is provided each EntityFieldValueParser instance
406357 # in order for them to avoid catching a string, previously
407358 # caught by a predecessor in iterations.
408359 parsers_memoization : Dict [int , Entity ] = {}
@@ -415,7 +366,7 @@ def parse_entities(message: MessageMixin,
415366 entity_field_instance .prefixes + entity_field_instance .suffixes )
416367
417368 # Share the 'exclude' tuple assigned by the developer in the
418- # application code to each Parser instance
369+ # application code to each EntityFieldValueParser instance
419370 entity_field_instance .exclude = exclude
420371 entity_field_instance .parse_message (
421372 message ,
0 commit comments