File tree Expand file tree Collapse file tree 3 files changed +4
-20
lines changed Expand file tree Collapse file tree 3 files changed +4
-20
lines changed Original file line number Diff line number Diff line change @@ -1106,15 +1106,14 @@ def is_extra_words_position_valid(match):
11061106 # Count of extra phrase markers
11071107 extra_phrase_count = 0
11081108
1109- for span , allowed_extra_words in extra_phrase_spans :
1110- rule_index = span .start - extra_phrase_count - 1
1111- allowed_extra_words = allowed_extra_words
1109+ for span , allowed_extra_word in extra_phrase_spans :
1110+ rule_index = span .start
11121111
11131112 matched_index = span .start + matched_count - extra_phrase_count
11141113 extra_words_count = 0
11151114
11161115 # return false if token before `extra-words` in `matched_token` is not same as token before `extra-phrases` in `rule_tokens`
1117- if (matched_tokens [matched_index - 1 ] != rule_tokens [rule_index ]):
1116+ if (matched_tokens [matched_index - 1 ] != rule_tokens [rule_index - 1 ]):
11181117 return False
11191118
11201119 # Count how many tokens in `matched_text` do not match the next rule token
@@ -1124,7 +1123,7 @@ def is_extra_words_position_valid(match):
11241123 matched_count += 1
11251124 extra_words_count += 1
11261125
1127- if extra_words_count > allowed_extra_words :
1126+ if extra_words_count > allowed_extra_word :
11281127 return False
11291128
11301129 extra_phrase_count += 1
Original file line number Diff line number Diff line change 77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10- import re
1110import os
1211import sys
1312import traceback
@@ -2333,9 +2332,6 @@ def tokens(self):
23332332
23342333 # identify and capture the spans of extra phrases specified within the rule
23352334 self .extra_phrase_spans = list (self .extra_phrases ())
2336-
2337- # remove extra_phrase marker from rules
2338- self .text = remove_extra_phrase (self .text )
23392335
23402336 text = self .text
23412337 # We tag this rule as being a bare URL if it starts with a scheme and is
@@ -2600,13 +2596,6 @@ def from_match_data(license_match_mapping):
26002596 return get_index ().rules_by_id [rule_identifier ]
26012597
26022598
2603- def remove_extra_phrase (text ):
2604- """
2605- Remove extra phrase markers like [[n]], where the n is a digit.
2606- """
2607- pattern = r'\[\[\d+\]\]'
2608- return re .sub (pattern , '' , text )
2609-
26102599def compute_relevance (length ):
26112600 """
26122601 Return a computed ``relevance`` given a ``length`` and a threshold.
Original file line number Diff line number Diff line change @@ -86,8 +86,6 @@ def query_lines(
8686extra_phrase_splitter = re .compile (extra_phrase_pattern , re .UNICODE ).findall
8787
8888
89- extra_phrase_removal_pattern = re .compile (r'\[\[\d+\]\]' )
90-
9189REQUIRED_PHRASE_OPEN = '{{'
9290REQUIRED_PHRASE_CLOSE = '}}'
9391
@@ -351,8 +349,6 @@ def index_tokenizer_with_stopwords(text, stopwords=STOPWORDS):
351349 """
352350 if not text :
353351 return [], {}
354-
355- text = extra_phrase_removal_pattern .sub ('' , text )
356352
357353 tokens = []
358354 tokens_append = tokens .append
You can’t perform that action at this time.
0 commit comments