Skip to content

Commit 0f7cacc

Browse files
authored
Merge pull request #1532 from JHertz5/issue-1531
Issue#1531: Improved parser to handle both qualified expressions and parenthesis character literals
2 parents 2e2f14f + 0e98e93 commit 0f7cacc

File tree

2 files changed

+68
-17
lines changed

2 files changed

+68
-17
lines changed

tests/tokens/test_token_method.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,23 @@ def test_qualified_expression(self):
650650

651651
self.assertEqual(lTokens, lActual)
652652

653+
def test_parenthesis_character_literal(self):
654+
sLine = " my_char <= '(';"
655+
656+
lTokens = []
657+
658+
lTokens.append(" ")
659+
lTokens.append("my_char")
660+
lTokens.append(" ")
661+
lTokens.append("<=")
662+
lTokens.append(" ")
663+
lTokens.append("'('")
664+
lTokens.append(";")
665+
666+
lActual = tokens.create(sLine)
667+
668+
self.assertEqual(lTokens, lActual)
669+
653670
def test_quotes_in_comments(self):
654671
sLine = '--! some text "other text'
655672

@@ -866,6 +883,23 @@ def test_multiple_character_literals(self):
866883

867884
self.assertEqual(lTokens, lActual)
868885

886+
def test_multiple_character_literals_with_qualified_expression(self):
887+
sLine = "std_logic'('1')|'1'|'0'"
888+
lTokens = []
889+
lTokens.append("std_logic")
890+
lTokens.append("'")
891+
lTokens.append("(")
892+
lTokens.append("'1'")
893+
lTokens.append(")")
894+
lTokens.append("|")
895+
lTokens.append("'1'")
896+
lTokens.append("|")
897+
lTokens.append("'0'")
898+
899+
lActual = tokens.create(sLine)
900+
901+
self.assertEqual(lTokens, lActual)
902+
869903
def test_backslash(self):
870904
sLine = 'a "/\\" b'
871905
lTokens = []

vsg/tokens.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,7 @@ def find_character_literal_candidates(lQuotes, lChars):
224224

225225
def is_character_literal_candidate(iIndex, lQuotes, lChars):
226226
iQuote = lQuotes[iIndex]
227-
return (
228-
there_is_a_single_token_between_quotes(iIndex, lQuotes)
229-
and token_between_quotes_is_a_single_character(iQuote, lChars)
230-
and token_is_not_a_parenthesis(iQuote, lChars)
231-
)
227+
return there_is_a_single_token_between_quotes(iIndex, lQuotes) and token_between_quotes_is_a_single_character(iQuote, lChars)
232228

233229

234230
def there_is_a_single_token_between_quotes(iIndex, lQuotes):
@@ -239,19 +235,40 @@ def token_between_quotes_is_a_single_character(iQuote, lChars):
239235
return len(lChars[iQuote + 1]) == 1
240236

241237

242-
def token_is_not_a_parenthesis(iQuote, lChars):
243-
return lChars[iQuote + 1] != "("
244-
245-
246-
def filter_character_literal_candidates(lLiterals):
238+
def filter_character_literal_candidates(lCandidates):
247239
lReturn = []
248-
for iIndex, lLiteral in enumerate(lLiterals[0:-1]):
249-
lNextLiteral = lLiterals[iIndex + 1]
250-
lPreviousLiteral = lLiterals[iIndex - 1]
251-
if lLiteral[1] == lNextLiteral[0] and lLiteral[0] == lPreviousLiteral[1]:
252-
continue
253-
lReturn.append(lLiteral)
254-
lReturn.append(lLiterals[-1])
240+
lSequentialCandidates = []
241+
for iIndex, lCandidate in enumerate(lCandidates):
242+
# The algorithm is a bit more complex than one might expect because it needs to be able to handle sequences of
243+
# character literals separated by a single character, e.g. `'1','0','a'`, as well as character literals inside
244+
# qualified expressions, e.g. std_logic'('1'), both of which include "red herring" candidates.
245+
# First, build up a sequence of sequential candidates, i.e. candidates that are separated by one character. Most
246+
# of the time, this sequence will be one long.
247+
lSequentialCandidates.append(lCandidate)
248+
249+
bCandidateIsLast = iIndex == len(lCandidates) - 1
250+
if bCandidateIsLast:
251+
bCandidateIsLastInSequence = True
252+
else:
253+
lNextLiteral = lCandidates[iIndex + 1]
254+
bCandidateIsLastInSequence = lCandidate[1] != lNextLiteral[0]
255+
256+
if bCandidateIsLastInSequence:
257+
# At the end of a sequence, filter the candidates to find the character literals. Sequential candidates will
258+
# alternate between valid and invalid candidates. For example, in `'1','0'`, the first candidate ('1') is
259+
# valid, the second (',') is invalid, and the third ('0') is valid. The first in the sequence will always be
260+
# valid unless a qualified expression is present. For example, in `std_logic'('1')`, the first candidate
261+
# ('(') is invalid and the second candidate ('1') is valid. If there is a qualified expression, the number
262+
# of candidates will be even; otherwise the number will be odd.
263+
# Therefore, filter by selecting every second candidate, starting with 0 if the number of candidates is odd
264+
# and starting with 1 if the number of candidates is even.
265+
iSequenceStart = (len(lSequentialCandidates) + 1) % 2
266+
lFilteredLiterals = [lSequentialCandidates[x] for x in range(iSequenceStart, len(lSequentialCandidates), 2)]
267+
lReturn.extend(lFilteredLiterals)
268+
269+
# Clear the sequential candidates for the next sequence.
270+
lSequentialCandidates = []
271+
255272
return lReturn
256273

257274

0 commit comments

Comments
 (0)