1313# See the License for the specific language governing permissions and
1414# limitations under the License.
1515#
16- from datetime import datetime
16+ from datetime import datetime , timedelta
1717from dateutil .relativedelta import relativedelta
18+
1819from lingua_franca .time import now_local
19- from .parse_common import is_numeric , look_for_fractions , Normalizer
20+
21+ from .parse_common import (is_numeric , look_for_fractions , Normalizer ,
22+ tokenize , Token )
23+
24+
25+ def _find_numbers_in_text (tokens ):
26+ """Finds duration related numbers in texts and makes a list of mappings.
27+
28+ The mapping will be for number to token that created it, if no number was
29+ created from the token the mapping will be from None to the token.
30+
31+ The function is optimized to generate data that can be parsed to a duration
32+ so it returns the list in reverse order to make the "size" (minutes/hours/
33+ etc.) come first and the related numbers afterwards.
34+
35+ Args:
36+ tokens: Tokens to parse
37+
38+ Returns:
39+ list of (number, token) tuples
40+ """
41+ parts = []
42+ for tok in tokens :
43+ res = extract_number_sv (tok .word )
44+ if res :
45+ parts .insert (0 , (res , tok ))
46+ # Special case for quarter of an hour
47+ if tok .word == 'kvart' :
48+ parts .insert (0 , (None , Token ('timmar' , index = - 1 )))
49+ elif tok .word in ['halvtimme' , 'halvtimma' ]:
50+ parts .insert (0 , (30 , tok ))
51+ parts .insert (0 , (None , Token ('minuter' , index = - 1 )))
52+ else :
53+ parts .insert (0 , (None , tok ))
54+ return parts
55+
56+
57+ def _combine_adjacent_numbers (number_map ):
58+ """Combine adjacent numbers through multiplication.
59+
60+ Walks through a number map and joins adjasent numbers to handle cases
61+ such as "en halvtimme" (one half hour).
62+
63+ Returns:
64+ (list): simplified number_map
65+ """
66+ simplified = []
67+ skip = False
68+ for i in range (len (number_map ) - 1 ):
69+ if skip :
70+ skip = False
71+ continue
72+ if number_map [i ][0 ] and number_map [i + 1 ][0 ]:
73+ combined_number = number_map [i ][0 ] * number_map [i + 1 ][0 ]
74+ combined_tokens = (number_map [i ][1 ], number_map [i + 1 ][1 ])
75+ simplified .append ((combined_number , combined_tokens ))
76+ skip = True
77+ else :
78+ simplified .append ((number_map [i ][0 ], (number_map [i ][1 ],)))
79+
80+ if not skip :
81+ simplified .append ((number_map [- 1 ][0 ], (number_map [- 1 ][1 ],)))
82+ return simplified
83+
84+
85+ def extract_duration_sv (text ):
86+ """
87+ Convert an swedish phrase into a number of seconds.
88+
89+ The function handles durations from seconds up to days.
90+
91+ Convert things like:
92+ "10 minute"
93+ "2 and a half hours"
94+ "3 days 8 hours 10 minutes and 49 seconds"
95+ into an int, representing the total number of seconds.
96+
97+ The words used in the duration will be consumed, and
98+ the remainder returned.
99+
100+ As an example, "set a timer for 5 minutes" would return
101+ (300, "set a timer for").
102+
103+ Args:
104+ text (str): string containing a duration
105+
106+ Returns:
107+ (timedelta, str):
108+ A tuple containing the duration and the remaining text
109+ not consumed in the parsing. The first value will
110+ be None if no duration is found. The text returned
111+ will have whitespace stripped from the ends.
112+ """
113+ tokens = tokenize (text )
114+ number_tok_map = _find_numbers_in_text (tokens )
115+ # Combine adjacent numbers
116+ simplified = _combine_adjacent_numbers (number_tok_map )
117+
118+ states = {
119+ 'days' : 0 ,
120+ 'hours' : 0 ,
121+ 'minutes' : 0 ,
122+ 'seconds' : 0
123+ }
124+
125+ # Parser state, mapping words that should set the parser to collect
126+ # numbers to a specific time "size"
127+ state_words = {
128+ 'days' : ('dygn' , 'dag' , 'dagar' , 'dags' ),
129+ 'hours' : ('timmar' , 'timme' , 'timma' , 'timmes' , 'timmas' ),
130+ 'minutes' : ('minuter' , 'minuters' , 'minut' , 'minuts' ),
131+ 'seconds' : ('sekunder' , 'sekunders' , 'sekund' , 'sekunds' )
132+ }
133+ binding_words = ('och' )
134+
135+ consumed = []
136+ state = None
137+ valid = False
138+
139+ for num , toks in simplified :
140+ if state and num :
141+ states [state ] += num
142+ consumed .extend (toks )
143+ valid = True # If a state field got set this is valid duration
144+ elif num is None :
145+ for s in state_words :
146+ if toks [0 ].word in state_words [s ]:
147+ state = s
148+ consumed .extend (toks )
149+ break
150+ else :
151+ if toks [0 ].word not in binding_words :
152+ state = None
153+
154+ td = timedelta (** states )
155+ remainder = ' ' .join ([t .word for t in tokens if t not in consumed ])
156+ return (td , remainder ) if valid else None
20157
21158
22159def extract_number_sv (text , short_scale = True , ordinals = False ):
@@ -29,8 +166,8 @@ def extract_number_sv(text, short_scale=True, ordinals=False):
29166 (int) or (float): The value of extracted number
30167 """
31168 # TODO: short_scale and ordinals don't do anything here.
32- # The parameters are present in the function signature for API compatibility
33- # reasons.
169+ # The parameters are present in the function signature for API
170+ # compatibility reasons.
34171 text = text .lower ()
35172 aWords = text .split ()
36173 and_pass = False
0 commit comments