Skip to content

Commit 2e2f14f

Browse files
authored
Merge pull request #1538 from JHertz5/issue-1537
Issue#1537: Performance improvements
2 parents 24bcf8a + 4c31f91 commit 2e2f14f

File tree

5 files changed

+137
-208
lines changed

5 files changed

+137
-208
lines changed

vsg/rule_list.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,10 @@ def load_rules():
7272
Returns: (rule object list)
7373
"""
7474
lRules = []
75-
for name, oPackage in inspect.getmembers(importlib.import_module("vsg.rules")):
76-
if inspect.ismodule(oPackage):
77-
for name, oRule in inspect.getmembers(oPackage):
78-
if inspect.isclass(oRule) and name.startswith("rule_"):
79-
lRules.append(oRule())
80-
75+
for _, oPackage in inspect.getmembers(importlib.import_module("vsg.rules"), inspect.ismodule):
76+
for rule_name, oRule in inspect.getmembers(oPackage, inspect.isclass):
77+
if rule_name.startswith("rule_"):
78+
lRules.append(oRule())
8179
return lRules
8280

8381

vsg/token_map.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ def get_token_indexes(self, oToken, bCopy=False):
2222
return []
2323

2424
def get_token_indexes_between_indexes(self, oToken, iStart, iEnd):
25-
lReturn = []
2625
lIndexes = self.get_token_indexes(oToken)
27-
for iIndex in lIndexes:
28-
if iIndex > iStart and iIndex < iEnd:
29-
lReturn.append(iIndex)
30-
return lReturn
26+
if not lIndexes:
27+
return []
28+
# Use a binary search to get the first index > the start.
29+
iLowIndex = bisect.bisect_right(lIndexes, iStart)
30+
# Use a binary search to get the last index < the end.
31+
iHighIndex = bisect.bisect_left(lIndexes, iEnd)
32+
return lIndexes[iLowIndex:iHighIndex]
3133

3234
def get_line_number_of_index(self, iIndex):
3335
iLine = bisect.bisect_left(self.dMap["parser"]["carriage_return"], iIndex) + 1
@@ -246,12 +248,13 @@ def extract_pairs(lStartIndexes, lEndIndexes):
246248

247249

248250
def extract_closest_pair(iStart, lEndIndexes, lPair, iMin):
249-
for iEnd in lEndIndexes:
250-
if iStart > iEnd:
251-
continue
252-
if iEnd - iStart < iMin:
253-
lPair = [iStart, iEnd]
254-
iMin = iEnd - iStart
251+
# The list of end indexes is assumed to be sorted. Get the first end >= start via binary search.
252+
iPos = bisect.bisect_left(lEndIndexes, iStart)
253+
if iPos == len(lEndIndexes):
254+
return lPair
255+
iEnd = lEndIndexes[iPos]
256+
if iEnd - iStart < iMin:
257+
lPair = [iStart, iEnd]
255258
return lPair
256259

257260

vsg/tokens.py

Lines changed: 62 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
11
# -*- coding: utf-8 -*-
22

3-
lSingleCharacterSymbols = [",", ":", "(", ")", "'", '"', "+", "&", "-", "*", "/", "<", ">", ";", "=", "[", "]", "?"]
4-
lTwoCharacterSymbols = ["=>", "**", ":=", "/=", ">=", "<=", "<>", "??", "?=", "?<", "?>", "<<", ">>", "--", "/*", "*/"]
5-
lThreeCharacterSymbols = ["?/=", "?<=", "?>="]
6-
lFourCharacterSymbols = ["\\?=\\"]
3+
lSingleCharacterSymbols = {",", ":", "(", ")", "'", '"', "+", "&", "-", "*", "/", "<", ">", ";", "=", "[", "]", "?"}
4+
lTwoCharacterSymbols = {"=>", "**", ":=", "/=", ">=", "<=", "<>", "??", "?=", "?<", "?>", "<<", ">>", "--", "/*", "*/"}
5+
lThreeCharacterSymbols = {"?/=", "?<=", "?>="}
6+
lMultiCharacterSymbols = lTwoCharacterSymbols | lThreeCharacterSymbols
77

8-
lStopChars = [" ", "(", ";"]
8+
lStopChars = {" ", "(", ";"}
9+
10+
11+
def build_symbol_prefix_tree(lSymbols):
12+
dPrefixTree = {}
13+
for sSymbol in lSymbols:
14+
dNode = dPrefixTree
15+
for oChar in sSymbol:
16+
# Return the branch of the prefix tree for this character, or create an empty branch if there isn't one.
17+
dNode = dNode.setdefault(oChar, {})
18+
# Use $ as the marker for the end of a branch.
19+
dNode["$"] = sSymbol
20+
return dPrefixTree
21+
22+
23+
dSymbolTree = build_symbol_prefix_tree(lMultiCharacterSymbols)
924

1025

1126
def create(sString):
@@ -17,8 +32,7 @@ def create(sString):
1732
oLine.combine_whitespace()
1833
oLine.combine_string_literals()
1934
oLine.combine_backslash_characters_into_symbols()
20-
oLine.combine_three_character_symbols()
21-
oLine.combine_two_character_symbols()
35+
oLine.combine_symbols_with_prefix_tree()
2236
oLine.combine_characters_into_words()
2337
oLine.combine_character_literals()
2438
oLine.split_natural_numbers()
@@ -28,7 +42,7 @@ def create(sString):
2842

2943
class New:
3044
def __init__(self, sLine):
31-
self.lChars = convert_string_to_chars(sLine)
45+
self.lChars = list(sLine)
3246

3347
def combine_whitespace(self):
3448
lReturn = []
@@ -46,6 +60,30 @@ def combine_whitespace(self):
4660

4761
self.lChars = lReturn
4862

63+
def combine_symbols_with_prefix_tree(self):
64+
lReturn = []
65+
iStart = 0
66+
iNumChars = len(self.lChars)
67+
while iStart < iNumChars:
68+
dNode = dSymbolTree
69+
iEnd = iStart
70+
oLastMatch = None
71+
iPrevEnd = iStart
72+
# Try to match as long a symbol as possible.
73+
while iEnd < iNumChars and self.lChars[iEnd] in dNode:
74+
dNode = dNode[self.lChars[iEnd]]
75+
iEnd += 1
76+
if "$" in dNode:
77+
oLastMatch = dNode["$"]
78+
iPrevEnd = iEnd
79+
if oLastMatch:
80+
lReturn.append(oLastMatch)
81+
iStart = iPrevEnd
82+
else:
83+
lReturn.append(self.lChars[iStart])
84+
iStart += 1
85+
self.lChars = lReturn
86+
4987
def combine_backslash_characters_into_symbols(self):
5088
lReturn = []
5189
sSymbol = ""
@@ -61,48 +99,21 @@ def combine_backslash_characters_into_symbols(self):
6199
lReturn = add_trailing_string(lReturn, sSymbol)
62100
self.lChars = lReturn
63101

64-
def combine_three_character_symbols(self):
65-
lReturn = []
66-
i = 0
67-
while i < len(self.lChars):
68-
sChars = "".join(self.lChars[i : i + 3])
69-
if sChars in lThreeCharacterSymbols:
70-
lReturn.append(sChars)
71-
i += 3
72-
else:
73-
lReturn.append(self.lChars[i])
74-
i += 1
75-
76-
self.lChars = lReturn
77-
78-
def combine_two_character_symbols(self):
79-
lReturn = []
80-
i = 0
81-
while i < len(self.lChars):
82-
sChars = "".join(self.lChars[i : i + 2])
83-
if sChars in lTwoCharacterSymbols:
84-
lReturn.append(sChars)
85-
i += 2
86-
else:
87-
lReturn.append(self.lChars[i])
88-
i += 1
89-
90-
self.lChars = lReturn
91-
92102
def combine_characters_into_words(self):
93103
lReturn = []
94-
sTemp = ""
104+
sWord = []
105+
95106
for sChar in self.lChars:
96107
if character_is_part_of_word(sChar):
97-
sTemp += sChar
108+
sWord.append(sChar)
98109
else:
99-
if sTemp != "":
100-
lReturn.append(sTemp)
110+
if sWord:
111+
lReturn.append("".join(sWord))
112+
sWord.clear()
101113
lReturn.append(sChar)
102-
sTemp = ""
103114

104-
if len(sTemp) != 0:
105-
lReturn.append(sTemp)
115+
if sWord:
116+
lReturn.append("".join(sWord))
106117

107118
self.lChars = lReturn
108119

@@ -213,31 +224,23 @@ def find_character_literal_candidates(lQuotes, lChars):
213224

214225
def is_character_literal_candidate(iIndex, lQuotes, lChars):
215226
iQuote = lQuotes[iIndex]
216-
if (
227+
return (
217228
there_is_a_single_token_between_quotes(iIndex, lQuotes)
218229
and token_between_quotes_is_a_single_character(iQuote, lChars)
219230
and token_is_not_a_parenthesis(iQuote, lChars)
220-
):
221-
return True
222-
return False
231+
)
223232

224233

225234
def there_is_a_single_token_between_quotes(iIndex, lQuotes):
226-
if lQuotes[iIndex] + 2 == lQuotes[iIndex + 1]:
227-
return True
228-
return False
235+
return lQuotes[iIndex] + 2 == lQuotes[iIndex + 1]
229236

230237

231238
def token_between_quotes_is_a_single_character(iQuote, lChars):
232-
if len(lChars[iQuote + 1]) == 1:
233-
return True
234-
return False
239+
return len(lChars[iQuote + 1]) == 1
235240

236241

237242
def token_is_not_a_parenthesis(iQuote, lChars):
238-
if lChars[iQuote + 1] == "(":
239-
return False
240-
return True
243+
return lChars[iQuote + 1] != "("
241244

242245

243246
def filter_character_literal_candidates(lLiterals):
@@ -272,15 +275,11 @@ def append_to_list(bSymbol, lChars, sChar):
272275

273276

274277
def backslash_character_found(sChar):
275-
if sChar == "\\":
276-
return True
277-
return False
278+
return sChar == "\\"
278279

279280

280281
def stop_character_found(sChar, bLiteral):
281-
if (sChar in lStopChars or " " in sChar) and bLiteral:
282-
return True
283-
return False
282+
return (sChar in lStopChars or " " in sChar) and bLiteral
284283

285284

286285
def add_trailing_string(lReturn, sString):
@@ -289,21 +288,8 @@ def add_trailing_string(lReturn, sString):
289288
return lReturn
290289

291290

292-
def convert_string_to_chars(sString):
293-
lReturn = []
294-
for sChar in sString:
295-
lReturn.append(sChar)
296-
return lReturn
297-
298-
299291
def character_is_part_of_word(sChar):
300-
if len(sChar) > 1:
301-
return False
302-
elif sChar.isspace():
303-
return False
304-
elif sChar in lSingleCharacterSymbols:
305-
return False
306-
return True
292+
return len(sChar) == 1 and not sChar.isspace() and sChar not in lSingleCharacterSymbols
307293

308294

309295
def find_indexes_of_double_quote_pairs(lTokens):

0 commit comments

Comments
 (0)