@@ -70,13 +70,13 @@ <h1>Source code for sequential.seq2pat</h1><div class="highlight"><pre>
7070< span class ="c1 "> # SPDX-License-Identifier: GPL-2.0</ span >
7171
7272< span class ="kn "> import</ span > < span class ="nn "> gc</ span >
73- < span class ="kn "> from</ span > < span class ="nn "> typing</ span > < span class ="kn "> import</ span > < span class ="n "> NamedTuple</ span > < span class ="p "> ,</ span > < span class ="n "> List</ span > < span class ="p "> ,</ span > < span class ="n "> Dict</ span > < span class ="p "> ,</ span > < span class ="n "> NoReturn</ span >
73+ < span class ="kn "> from</ span > < span class ="nn "> typing</ span > < span class ="kn "> import</ span > < span class ="n "> NamedTuple</ span > < span class ="p "> ,</ span > < span class ="n "> List</ span > < span class ="p "> ,</ span > < span class ="n "> Dict</ span > < span class ="p "> ,</ span > < span class ="n "> NoReturn</ span > < span class =" p " > , </ span > < span class =" n " > Optional </ span >
7474
7575< span class ="kn "> from</ span > < span class ="nn "> sequential.backend</ span > < span class ="kn "> import</ span > < span class ="n "> seq_to_pat</ span > < span class ="k "> as</ span > < span class ="n "> stp</ span >
7676< span class ="kn "> from</ span > < span class ="nn "> sequential.utils</ span > < span class ="kn "> import</ span > < span class ="n "> Num</ span > < span class ="p "> ,</ span > < span class ="n "> check_true</ span > < span class ="p "> ,</ span > < span class ="n "> get_max_column_size</ span > < span class ="p "> ,</ span > \
7777 < span class ="n "> get_min_value</ span > < span class ="p "> ,</ span > < span class ="n "> get_max_value</ span > < span class ="p "> ,</ span > < span class ="n "> sort_pattern</ span > < span class ="p "> ,</ span > < span class ="n "> item_map</ span > < span class ="p "> ,</ span > \
7878 < span class ="n "> string_to_int</ span > < span class ="p "> ,</ span > < span class ="n "> int_to_string</ span > < span class ="p "> ,</ span > < span class ="n "> check_sequence_feature_same_length</ span > < span class ="p "> ,</ span > \
79- < span class ="n "> validate_attribute_values</ span > < span class ="p "> ,</ span > < span class ="n "> validate_sequences</ span >
79+ < span class ="n "> validate_attribute_values</ span > < span class ="p "> ,</ span > < span class ="n "> validate_sequences</ span > < span class =" p " > , </ span > < span class =" n " > validate_max_span </ span >
8080
8181
8282< span class ="c1 "> # IMPORTANT: Constant values should not be changed</ span >
@@ -345,11 +345,18 @@ <h1>Source code for sequential.seq2pat</h1><div class="highlight"><pre>
345345< span class ="sd "> sequences : List[list]</ span >
346346< span class ="sd "> A list of sequences each with a list of events.</ span >
347347< span class ="sd "> The event values can be all strings or all integers.</ span >
348+ < span class ="sd "> max_span: Optional[int]</ span >
349+ < span class ="sd "> The value for applying a built-in maximum span constraint to the length of items in mining, max_span=10 by</ span >
350+ < span class ="sd "> default (10 items). This is going to avoid regular users to run into a scaling issue when data contains long</ span >
351+ < span class ="sd "> sequences but no constraints are used to run the mining efficiently and practically.</ span >
352+ < span class ="sd "> Power users can choose to drop this constraint by setting it to be None or increase the maximum span</ span >
353+ < span class ="sd "> as the system has resources to support.</ span >
348354< span class ="sd "> """</ span >
349355
350- < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> sequences</ span > < span class ="p "> :</ span > < span class ="n "> List</ span > < span class ="p "> [</ span > < span class ="nb "> list</ span > < span class ="p "> ]):</ span >
351- < span class ="c1 "> # Validate input sequences </ span >
356+ < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> sequences</ span > < span class ="p "> :</ span > < span class ="n "> List</ span > < span class ="p "> [</ span > < span class ="nb "> list</ span > < span class ="p "> ], </ span > < span class =" n " > max_span </ span > < span class =" p " > : </ span > < span class =" n " > Optional </ span > < span class =" p " > [ </ span > < span class =" nb " > int </ span > < span class =" p " > ] </ span > < span class =" o " > = </ span > < span class =" mi " > 10 </ span > < span class =" p " > ):</ span >
357+ < span class ="c1 "> # Validate input</ span >
352358 < span class ="n "> validate_sequences</ span > < span class ="p "> (</ span > < span class ="n "> sequences</ span > < span class ="p "> )</ span >
359+ < span class ="n "> validate_max_span</ span > < span class ="p "> (</ span > < span class ="n "> max_span</ span > < span class ="p "> )</ span >
353360
354361 < span class ="c1 "> # Input sequences</ span >
355362 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _sequences</ span > < span class ="p "> :</ span > < span class ="n "> List</ span > < span class ="p "> [</ span > < span class ="nb "> list</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="n "> sequences</ span >
@@ -373,6 +380,15 @@ <h1>Source code for sequential.seq2pat</h1><div class="highlight"><pre>
373380 < span class ="c1 "> # Cython implementor object</ span >
374381 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _cython_imp</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
375382
383+ < span class ="k "> if</ span > < span class ="n "> max_span</ span > < span class ="p "> :</ span >
384+ < span class ="c1 "> # Create index attribute</ span >
385+ < span class ="n "> index_attr</ span > < span class ="o "> =</ span > < span class ="n "> Attribute</ span > < span class ="p "> ([[</ span > < span class ="n "> i</ span > < span class ="k "> for</ span > < span class ="n "> i</ span > < span class ="ow "> in</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> seq</ span > < span class ="p "> ))]</ span > < span class ="k "> for</ span > < span class ="n "> seq</ span > < span class ="ow "> in</ span > < span class ="n "> sequences</ span > < span class ="p "> ])</ span >
386+
387+ < span class ="c1 "> # Add built-in maximum span constraint on index.</ span >
388+ < span class ="c1 "> # The minimum span is at least 1 between two indices. Here we add it explicitly.</ span >
389+ < span class ="c1 "> # Given max_span items, the maximum difference on the index is (max_span - 1)</ span >
390+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> add_constraint</ span > < span class ="p "> (</ span > < span class ="mi "> 1</ span > < span class ="o "> <=</ span > < span class ="n "> index_attr</ span > < span class ="o "> .</ span > < span class ="n "> span</ span > < span class ="p "> ()</ span > < span class ="o "> <=</ span > < span class ="p "> (</ span > < span class ="n "> max_span</ span > < span class ="o "> -</ span > < span class ="mi "> 1</ span > < span class ="p "> ))</ span >
391+
376392 < span class ="nd "> @property</ span >
377393 < span class ="k "> def</ span > < span class ="nf "> sequences</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> )</ span > < span class ="o "> -></ span > < span class ="n "> List</ span > < span class ="p "> [</ span > < span class ="nb "> list</ span > < span class ="p "> ]:</ span >
378394 < span class ="sd "> """Sequence</ span >
0 commit comments