@@ -28,6 +28,7 @@ def __init__(self,
2828 span : int | typing .Callable = 0 ,
2929 identifier : Type [Identifier ] | None = None ,
3030 exclude : typing .Iterable [str ] = None ,
31+ as_list : bool = False ,
3132 ** kwargs ):
3233
3334 if hasattr (self , "value" ):
@@ -45,6 +46,7 @@ def __init__(self,
4546 self .default = default if default is not None else None
4647 self .identifier = identifier
4748 self .span = span
49+ self .as_list = as_list
4850 self ._properties_for_evaluation = {
4951 "prefixes" : self .prefixes ,
5052 "suffixes" : self .suffixes ,
@@ -119,7 +121,9 @@ def parse_message(self, message: MessageMixin,
119121 common_occurrences = tuple (
120122 OrderedSet (casefolded_msg ).intersection (self .valid_strings ))
121123
122- for word in common_occurrences :
124+ for i , word in enumerate (common_occurrences ):
125+ if i > self .span and not self .as_list :
126+ break
123127 word_index = casefolded_msg .index (word )
124128 output .append (message .content [word_index ])
125129
@@ -129,6 +133,14 @@ def parse_message(self, message: MessageMixin,
129133 self .value = Entity (output .pop ())
130134 else :
131135 self .value = Entity (self .default , is_fallback_default = True )
136+
137+ if self .value :
138+ entity = self .value
139+ if isinstance (entity .value , list ):
140+ [message .content .remove (i ) for i in entity .value ]
141+ entity .index_in_message += len (entity .value )
142+ elif isinstance (entity .value , str ):
143+ message .content .remove (self .value )
132144 return
133145
134146 if self .truncates_message_in_parsing is False :
@@ -137,7 +149,6 @@ def parse_message(self, message: MessageMixin,
137149 for i , _ in enumerate (message .content ):
138150 parsed_entity : Entity = self ._identify_value (message ,
139151 start_index = i )
140-
141152 # An entity has been identified, and it's unique.
142153 if parsed_entity is not None and memoization .get (
143154 parsed_entity .index_in_message ) is None :
@@ -292,43 +303,43 @@ def _identify_value(self, message: MessageMixin,
292303 # for each span iteration as the walk in the message progresses.
293304 # If an Identifier is does not comply with a string, the walk is
294305 # cancelled.
295- if parsed_entity is not None :
296- while parsed_entity . value . casefold () in self . exclude :
297- parsed_entity . index_in_message += 1
298- # Traverse the message for as long as the current found
299- # entity is in the 'exclude' tuple. If the end of message
300- # is reached, quietly break the loop.
301- try :
302- parsed_entity . value = message . content [
303- parsed_entity . index_in_message ]
304- except IndexError :
305- return None
306-
307- current_index = parsed_entity . index_in_message
306+ if parsed_entity is None :
307+ return parsed_entity
308+
309+ while parsed_entity . value . casefold () in self . exclude :
310+ parsed_entity . index_in_message += 1
311+ # Traverse the message for as long as the current found
312+ # entity is in the 'exclude' tuple. If the end of message
313+ # is reached, quietly break the loop.
314+ try :
315+ parsed_entity . value = message . content [
316+ parsed_entity . index_in_message ]
317+ except IndexError :
318+ return None
308319
309- for i in range (1 , self .span ):
310- try :
311- current_index += 1
312- if self .identifier :
313- identifier_object : Identifier = self .identifier (
314- start_index = current_index )
315- # Identifier did not find
316- span_entity = identifier_object .try_identify_entity (
317- message )
318- if span_entity is None or span_entity .index_in_message != current_index :
319- break
320- span_value = span_entity .value
321- else :
322- span_value = message .content [current_index ]
323-
324- # There are not enough elements in message.content to walk
325- # as far as the span property requests - abort.
326- except IndexError :
327- break
320+ # Now, add words for as long as `span` allows us to iterate.
321+ for i in range (1 , self .span ):
322+ parsed_entity .index_in_message += 1
323+ try :
324+ if self .identifier :
325+ identifier_object : Identifier = self .identifier (
326+ start_index = parsed_entity .index_in_message )
327+ # Identifier did not find
328+ span_entity = identifier_object .try_identify_entity (
329+ message )
330+ if span_entity is None or span_entity .index_in_message != parsed_entity .index_in_message :
331+ break
332+ span_value = span_entity .value
328333 else :
329- if span_value not in self .exclude :
330- print (f"{ span_value } is not in { self .exclude } for { self } " )
331- parsed_entity .value += f" { span_value } "
334+ span_value = message .content [parsed_entity .index_in_message ]
335+
336+ # There are not enough elements in message.content to walk
337+ # as far as the span property requests - abort.
338+ except IndexError :
339+ break
340+ else :
341+ if span_value not in self .exclude :
342+ parsed_entity .value += f" { span_value } "
332343 return parsed_entity
333344
334345
0 commit comments