Merge pull request #1532 from JHertz5/issue-1531

JHertz5 · web-flow · commit 0f7cacce29b8 · 2026-01-13T01:37:20.000Z
Issue#1531: Improved parser to handle both qualified expressions and parenthesis character literals
diff --git a/tests/tokens/test_token_method.py b/tests/tokens/test_token_method.py
@@ -650,6 +650,23 @@ def test_qualified_expression(self):
 
         self.assertEqual(lTokens, lActual)
 
+    def test_parenthesis_character_literal(self):
+        sLine = "  my_char <= '(';"
+
+        lTokens = []
+
+        lTokens.append("  ")
+        lTokens.append("my_char")
+        lTokens.append(" ")
+        lTokens.append("<=")
+        lTokens.append(" ")
+        lTokens.append("'('")
+        lTokens.append(";")
+
+        lActual = tokens.create(sLine)
+
+        self.assertEqual(lTokens, lActual)
+
     def test_quotes_in_comments(self):
         sLine = '--! some text "other text'
 
@@ -866,6 +883,23 @@ def test_multiple_character_literals(self):
 
         self.assertEqual(lTokens, lActual)
 
+    def test_multiple_character_literals_with_qualified_expression(self):
+        sLine = "std_logic'('1')|'1'|'0'"
+        lTokens = []
+        lTokens.append("std_logic")
+        lTokens.append("'")
+        lTokens.append("(")
+        lTokens.append("'1'")
+        lTokens.append(")")
+        lTokens.append("|")
+        lTokens.append("'1'")
+        lTokens.append("|")
+        lTokens.append("'0'")
+
+        lActual = tokens.create(sLine)
+
+        self.assertEqual(lTokens, lActual)
+
     def test_backslash(self):
         sLine = 'a "/\\" b'
         lTokens = []
diff --git a/vsg/tokens.py b/vsg/tokens.py
@@ -224,11 +224,7 @@ def find_character_literal_candidates(lQuotes, lChars):
 
 def is_character_literal_candidate(iIndex, lQuotes, lChars):
     iQuote = lQuotes[iIndex]
-    return (
-        there_is_a_single_token_between_quotes(iIndex, lQuotes)
-        and token_between_quotes_is_a_single_character(iQuote, lChars)
-        and token_is_not_a_parenthesis(iQuote, lChars)
-    )
+    return there_is_a_single_token_between_quotes(iIndex, lQuotes) and token_between_quotes_is_a_single_character(iQuote, lChars)
 
 
 def there_is_a_single_token_between_quotes(iIndex, lQuotes):
@@ -239,19 +235,40 @@ def token_between_quotes_is_a_single_character(iQuote, lChars):
     return len(lChars[iQuote + 1]) == 1
 
 
-def token_is_not_a_parenthesis(iQuote, lChars):
-    return lChars[iQuote + 1] != "("
-
-
-def filter_character_literal_candidates(lLiterals):
+def filter_character_literal_candidates(lCandidates):
     lReturn = []
-    for iIndex, lLiteral in enumerate(lLiterals[0:-1]):
-        lNextLiteral = lLiterals[iIndex + 1]
-        lPreviousLiteral = lLiterals[iIndex - 1]
-        if lLiteral[1] == lNextLiteral[0] and lLiteral[0] == lPreviousLiteral[1]:
-            continue
-        lReturn.append(lLiteral)
-    lReturn.append(lLiterals[-1])
+    lSequentialCandidates = []
+    for iIndex, lCandidate in enumerate(lCandidates):
+        # The algorithm is a bit more complex than one might expect because it needs to be able to handle sequences of
+        # character literals separated by a single character, e.g. `'1','0','a'`, as well as character literals inside
+        # qualified expressions, e.g. std_logic'('1'), both of which include "red herring" candidates.
+        # First, build up a sequence of sequential candidates, i.e. candidates that are separated by one character. Most
+        # of the time, this sequence will be one long.
+        lSequentialCandidates.append(lCandidate)
+
+        bCandidateIsLast = iIndex == len(lCandidates) - 1
+        if bCandidateIsLast:
+            bCandidateIsLastInSequence = True
+        else:
+            lNextLiteral = lCandidates[iIndex + 1]
+            bCandidateIsLastInSequence = lCandidate[1] != lNextLiteral[0]
+
+        if bCandidateIsLastInSequence:
+            # At the end of a sequence, filter the candidates to find the character literals. Sequential candidates will
+            # alternate between valid and invalid candidates. For example, in `'1','0'`, the first candidate ('1') is
+            # valid, the second (',') is invalid, and the third ('0') is valid. The first in the sequence will always be
+            # valid unless a qualified expression is present. For example, in `std_logic'('1')`, the first candidate
+            # ('(') is invalid and the second candidate ('1') is valid. If there is a qualified expression, the number
+            # of candidates will be even; otherwise the number will be odd.
+            # Therefore, filter by selecting every second candidate, starting with 0 if the number of candidates is odd
+            # and starting with 1 if the number of candidates is even.
+            iSequenceStart = (len(lSequentialCandidates) + 1) % 2
+            lFilteredLiterals = [lSequentialCandidates[x] for x in range(iSequenceStart, len(lSequentialCandidates), 2)]
+            lReturn.extend(lFilteredLiterals)
+
+            # Clear the sequential candidates for the next sequence.
+            lSequentialCandidates = []
+
     return lReturn