diff --git a/pyproject.toml b/pyproject.toml index d806dd132..8bc80fa77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ passenv=ALDEC_LICENSE_FILE deps= fmt: black + typing-extensions pytest lint: pycodestyle lint: pylint diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..66f7dbefd --- /dev/null +++ b/setup.cfg @@ -0,0 +1,44 @@ +[metadata] +name = vunit_hdl +version = attr: vunit.about.version +author = Lars Asplund +author_email = lars.anders.asplund@gmail.com +license = Mozilla Public License 2.0 (MPL 2.0) +description = VUnit is an open source unit testing framework for VHDL/SystemVerilog. +long_description = attr: vunit.about.doc +url = https://github.com/VUnit/vunit +classifiers = + Development Status :: 5 - Production/Stable + License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) + Natural Language :: English + Intended Audience :: Developers + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Operating System :: Microsoft :: Windows + Operating System :: MacOS :: MacOS X + Operating System :: POSIX :: Linux + Topic :: Software Development :: Testing + Topic :: Scientific/Engineering :: Electronic Design Automation (EDA) + +[options] +zip_safe = False +include_package_data = True +python_requires = >=3.7 +packages = + tests + tests.lint + tests.unit + tests.acceptance + vunit + vunit.com + vunit.parsing + vunit.parsing.verilog + vunit.sim_if + vunit.test + vunit.ui + vunit.vivado +install_requires = + colorama + typing-extensions diff --git a/setup.py b/setup.py index edca6d9a4..e2bec06b8 100644 --- a/setup.py +++ b/setup.py @@ -10,22 +10,29 @@ import os import sys -from pathlib import Path +from glob import glob from logging import warning +from pathlib import Path +from typing import List, Optional + from setuptools import setup # Ensure that the source tree is on the sys path sys.path.insert(0, str(Path(__file__).parent.resolve())) -from vunit.about import version, doc # pylint: disable=wrong-import-position -from vunit.builtins import osvvm_is_installed # pylint: disable=wrong-import-position + +def osvvm_is_installed() -> bool: + """ + Checks if OSVVM is installed within the VUnit directory structure + """ + return len(glob(str(Path(__file__) / "vunit" / "vhdl" / "osvvm" / "*.vhd"))) != 0 -def find_all_files(directory, endings=None): +def find_all_files(directory: str, endings: Optional[List[str]] = None) -> List[str]: """ Recursively find all files within directory """ - result = [] + result: List[str] = [] for root, _, filenames in os.walk(directory): for filename in filenames: ending = os.path.splitext(filename)[-1] @@ -34,55 +41,14 @@ def find_all_files(directory, endings=None): return result -DATA_FILES = [] +DATA_FILES: List[str] = [] DATA_FILES += find_all_files("vunit", endings=[".tcl"]) DATA_FILES += find_all_files(str(Path("vunit") / "vhdl")) DATA_FILES += find_all_files(str(Path("vunit") / "verilog"), endings=[".v", ".sv", ".svh"]) DATA_FILES = [os.path.relpath(file_name, "vunit") for file_name in DATA_FILES] setup( - name="vunit_hdl", - version=version(), - packages=[ - "tests", - "tests.lint", - "tests.unit", - "tests.acceptance", - "vunit", - "vunit.com", - "vunit.parsing", - "vunit.parsing.verilog", - "vunit.sim_if", - "vunit.test", - "vunit.ui", - "vunit.vivado", - ], package_data={"vunit": DATA_FILES}, - zip_safe=False, - url="https://github.com/VUnit/vunit", - classifiers=[ - "Development Status :: 5 - Production/Stable", - "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", - "Natural Language :: English", - "Intended Audience :: Developers", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS :: MacOS X", - "Operating System :: POSIX :: Linux", - "Topic :: Software Development :: Testing", - "Topic :: Scientific/Engineering :: Electronic Design Automation (EDA)", - ], - python_requires=">=3.6", - install_requires=["colorama"], - requires=["colorama"], - license="Mozilla Public License 2.0 (MPL 2.0)", - author="Lars Asplund", - author_email="lars.anders.asplund@gmail.com", - description="VUnit is an open source unit testing framework for VHDL/SystemVerilog.", - long_description=doc(), ) if not osvvm_is_installed(): diff --git a/tests/unit/test_verilog_parser.py b/tests/unit/test_verilog_parser.py index a78f36321..5dc1955bb 100644 --- a/tests/unit/test_verilog_parser.py +++ b/tests/unit/test_verilog_parser.py @@ -8,6 +8,7 @@ Test of the Verilog parser """ +from typing import Optional from unittest import TestCase, mock import os from pathlib import Path @@ -15,6 +16,7 @@ import shutil from vunit.ostools import renew_path from vunit.parsing.verilog.parser import VerilogParser +from vunit.parsing.verilog.preprocess import Defines, IncludePaths class TestVerilogParser(TestCase): # pylint: disable=too-many-public-methods @@ -377,7 +379,7 @@ def test_cached_parsing_updated_by_other_defines(self): self.assertEqual(len(result.modules), 1) self.assertEqual(result.modules[0].name, "mod2") - def write_file(self, file_name, contents): + def write_file(self, file_name: str, contents: str) -> None: """ Write file with contents into output path """ @@ -388,7 +390,9 @@ def write_file(self, file_name, contents): with full_name.open("w") as fptr: fptr.write(contents) - def parse(self, code, include_paths=None, cache=None, defines=None): + def parse( + self, code: str, include_paths: Optional[IncludePaths] = None, cache=None, defines: Optional[Defines] = None + ): """ Helper function to parse """ diff --git a/tests/unit/test_verilog_preprocessor.py b/tests/unit/test_verilog_preprocessor.py index 2996f8c53..d6d998ca1 100644 --- a/tests/unit/test_verilog_preprocessor.py +++ b/tests/unit/test_verilog_preprocessor.py @@ -14,10 +14,11 @@ from pathlib import Path import os +from typing import Dict, List, Optional from unittest import TestCase, mock import shutil from vunit.ostools import renew_path, write_file -from vunit.parsing.verilog.preprocess import VerilogPreprocessor, Macro +from vunit.parsing.verilog.preprocess import Defines, IncludePaths, IncludedFiles, VerilogPreprocessor, Macro from vunit.parsing.verilog.tokenizer import VerilogTokenizer from vunit.parsing.tokenizer import Token @@ -811,7 +812,9 @@ def test_ignores_protected_region(self): result.assert_has_tokens("keep_before\n\nkeep_end") result.assert_no_log() - def preprocess(self, code, file_name="fn.v", include_paths=None): + def preprocess( + self, code: str, file_name: str = "fn.v", include_paths: Optional[IncludePaths] = None + ) -> "PreprocessResult": """ Tokenize & Preprocess """ @@ -819,8 +822,8 @@ def preprocess(self, code, file_name="fn.v", include_paths=None): preprocessor = VerilogPreprocessor(tokenizer) write_file(file_name, code) tokens = tokenizer.tokenize(code, file_name=file_name) - defines = {} - included_files = [] + defines: Defines = {} + included_files: IncludedFiles = [] with mock.patch("vunit.parsing.verilog.preprocess.LOGGER", autospec=True) as logger: tokens = preprocessor.preprocess(tokens, defines, include_paths, included_files) return PreprocessResult( @@ -831,7 +834,7 @@ def preprocess(self, code, file_name="fn.v", include_paths=None): logger, ) - def write_file(self, file_name, contents): + def write_file(self, file_name: str, contents: str) -> None: """ Write file with contents into output path """ @@ -843,18 +846,24 @@ def write_file(self, file_name, contents): fptr.write(contents) -class PreprocessResult(object): +class PreprocessResult: """ Helper object to test preprocessing """ + test: TestCase + tokens: List[Token] + defines: Dict[str, Macro] + included_files: List[str] + logger: mock.Mock + def __init__( self, # pylint: disable=too-many-arguments - test, - tokens, - defines, - included_files, - logger, + test: TestCase, + tokens: List[Token], + defines: Defines, + included_files: List[str], + logger: mock.Mock, ): self.test = test self.tokens = tokens @@ -862,7 +871,7 @@ def __init__( self.included_files = included_files self.logger = logger - def assert_has_tokens(self, code, noloc=True): + def assert_has_tokens(self, code: str, noloc: bool = True): """ Check that tokens are the same as code """ @@ -880,19 +889,19 @@ def assert_no_defines(self): """ self.test.assertEqual(self.defines, {}) - def assert_included_files(self, included_files): + def assert_included_files(self, included_files: List[str]) -> None: """ Assert that these files where included """ self.test.assertEqual(self.included_files, included_files) - def assert_has_defines(self, defines): + def assert_has_defines(self, defines: Defines) -> None: """ Assert that these defines were made """ self.test.assertEqual(self.defines.keys(), defines.keys()) - def macro_strip_loc(define): + def macro_strip_loc(define: Macro) -> None: """ Strip location information from a Macro """ @@ -903,7 +912,7 @@ def macro_strip_loc(define): for key in self.defines: self.test.assertEqual(macro_strip_loc(self.defines[key]), macro_strip_loc(defines[key])) - def assert_no_log(self): + def assert_no_log(self) -> None: """ Assert that no log call were made """ @@ -913,7 +922,7 @@ def assert_no_log(self): self.test.assertEqual(self.logger.error.mock_calls, []) -def tokenize(code, file_name="fn.v"): +def tokenize(code: str, file_name: str = "fn.v") -> List[Token]: """ Tokenize """ @@ -921,7 +930,7 @@ def tokenize(code, file_name="fn.v"): return tokenizer.tokenize(code, file_name=file_name) -def strip_loc(tokens): +def strip_loc(tokens: List[Token]) -> List[Token]: """ Strip location information """ diff --git a/tests/unit/test_verilog_tokenizer.py b/tests/unit/test_verilog_tokenizer.py index 308162504..13fbcc8f0 100644 --- a/tests/unit/test_verilog_tokenizer.py +++ b/tests/unit/test_verilog_tokenizer.py @@ -11,29 +11,11 @@ Test of the Verilog tokenizer """ +from typing import List from unittest import TestCase +from vunit.parsing.tokenizer import Token from vunit.parsing.verilog.tokenizer import VerilogTokenizer -from vunit.parsing.verilog.tokens import ( - COMMA, - COMMENT, - ENDMODULE, - ENDPACKAGE, - EQUAL, - HASH, - IDENTIFIER, - IMPORT, - LPAR, - MODULE, - MULTI_COMMENT, - NEWLINE, - PACKAGE, - PARAMETER, - PREPROCESSOR, - RPAR, - SEMI_COLON, - STRING, - WHITESPACE, -) +from vunit.parsing.verilog.tokens import TokenKind, KeywordKind class TestVerilogTokenizer(TestCase): @@ -45,9 +27,9 @@ def test_tokenizes_define(self): self.check( "`define name", [ - PREPROCESSOR(value="define"), - WHITESPACE(value=" "), - IDENTIFIER(value="name"), + Token(TokenKind.PREPROCESSOR, value="define"), + Token(TokenKind.WHITESPACE, value=" "), + Token(TokenKind.IDENTIFIER, value="name"), ], ) @@ -55,93 +37,95 @@ def test_newline_is_not_whitespace(self): self.check( " \n \n\n", [ - WHITESPACE(value=" "), - NEWLINE(), - WHITESPACE(value=" "), - NEWLINE(), - NEWLINE(), + Token(TokenKind.WHITESPACE, value=" "), + Token(TokenKind.NEWLINE), + Token(TokenKind.WHITESPACE, value=" "), + Token(TokenKind.NEWLINE), + Token(TokenKind.NEWLINE), ], ) def test_tokenizes_string_literal(self): - self.check('"hello"', [STRING(value="hello")]) + self.check('"hello"', [Token(TokenKind.STRING, value="hello")]) - self.check('"hel""lo"', [STRING(value="hel"), STRING(value="lo")]) + self.check('"hel""lo"', [Token(TokenKind.STRING, value="hel"), Token(TokenKind.STRING, value="lo")]) - self.check(r'"h\"ello"', [STRING(value='h"ello')]) + self.check(r'"h\"ello"', [Token(TokenKind.STRING, value='h"ello')]) - self.check(r'"h\"ello"', [STRING(value='h"ello')]) + self.check(r'"h\"ello"', [Token(TokenKind.STRING, value='h"ello')]) - self.check(r'"\"ello"', [STRING(value='"ello')]) + self.check(r'"\"ello"', [Token(TokenKind.STRING, value='"ello')]) - self.check(r'"\"\""', [STRING(value='""')]) + self.check(r'"\"\""', [Token(TokenKind.STRING, value='""')]) self.check( r'''"hi there"''', - [STRING(value="hi\nthere")], + [Token(TokenKind.STRING, value="hi\nthere")], ) self.check( r'''"hi\ there"''', - [STRING(value="hithere")], + [Token(TokenKind.STRING, value="hithere")], ) def test_tokenizes_single_line_comment(self): - self.check("// asd", [COMMENT(value=" asd")]) + self.check("// asd", [Token(TokenKind.COMMENT, value=" asd")]) - self.check("asd// asd", [IDENTIFIER(value="asd"), COMMENT(value=" asd")]) + self.check("asd// asd", [Token(TokenKind.IDENTIFIER, value="asd"), Token(TokenKind.COMMENT, value=" asd")]) - self.check("asd// asd //", [IDENTIFIER(value="asd"), COMMENT(value=" asd //")]) + self.check( + "asd// asd //", [Token(TokenKind.IDENTIFIER, value="asd"), Token(TokenKind.COMMENT, value=" asd //")] + ) def test_tokenizes_multi_line_comment(self): - self.check("/* asd */", [MULTI_COMMENT(value=" asd ")]) + self.check("/* asd */", [Token(TokenKind.MULTI_COMMENT, value=" asd ")]) - self.check("/* /* asd */", [MULTI_COMMENT(value=" /* asd ")]) + self.check("/* /* asd */", [Token(TokenKind.MULTI_COMMENT, value=" /* asd ")]) - self.check("/* /* asd */", [MULTI_COMMENT(value=" /* asd ")]) + self.check("/* /* asd */", [Token(TokenKind.MULTI_COMMENT, value=" /* asd ")]) - self.check("/* 1 \n 2 */", [MULTI_COMMENT(value=" 1 \n 2 ")]) + self.check("/* 1 \n 2 */", [Token(TokenKind.MULTI_COMMENT, value=" 1 \n 2 ")]) - self.check("/* 1 \r\n 2 */", [MULTI_COMMENT(value=" 1 \r\n 2 ")]) + self.check("/* 1 \r\n 2 */", [Token(TokenKind.MULTI_COMMENT, value=" 1 \r\n 2 ")]) def test_tokenizes_semi_colon(self): - self.check("asd;", [IDENTIFIER(value="asd"), SEMI_COLON(value="")]) + self.check("asd;", [Token(TokenKind.IDENTIFIER, value="asd"), Token(TokenKind.SEMI_COLON, value="")]) def test_tokenizes_newline(self): - self.check("asd\n", [IDENTIFIER(value="asd"), NEWLINE(value="")]) + self.check("asd\n", [Token(TokenKind.IDENTIFIER, value="asd"), Token(TokenKind.NEWLINE, value="")]) def test_tokenizes_comma(self): - self.check(",", [COMMA(value="")]) + self.check(",", [Token(TokenKind.COMMA, value="")]) def test_tokenizes_parenthesis(self): - self.check("()", [LPAR(value=""), RPAR(value="")]) + self.check("()", [Token(TokenKind.LPAR, value=""), Token(TokenKind.RPAR, value="")]) def test_tokenizes_hash(self): - self.check("#", [HASH(value="")]) + self.check("#", [Token(TokenKind.HASH, value="")]) def test_tokenizes_equal(self): - self.check("=", [EQUAL(value="")]) + self.check("=", [Token(TokenKind.EQUAL, value="")]) def test_escaped_newline_ignored(self): - self.check("a\\\nb", [IDENTIFIER(value="a"), IDENTIFIER(value="b")]) + self.check("a\\\nb", [Token(TokenKind.IDENTIFIER, value="a"), Token(TokenKind.IDENTIFIER, value="b")]) def test_tokenizes_keywords(self): - self.check("module", [MODULE(value="module")]) - self.check("endmodule", [ENDMODULE(value="endmodule")]) - self.check("package", [PACKAGE(value="package")]) - self.check("endpackage", [ENDPACKAGE(value="endpackage")]) - self.check("parameter", [PARAMETER(value="parameter")]) - self.check("import", [IMPORT(value="import")]) + self.check("module", [Token(KeywordKind.MODULE, value="module")]) + self.check("endmodule", [Token(KeywordKind.ENDMODULE, value="endmodule")]) + self.check("package", [Token(KeywordKind.PACKAGE, value="package")]) + self.check("endpackage", [Token(KeywordKind.ENDPACKAGE, value="endpackage")]) + self.check("parameter", [Token(KeywordKind.PARAMETER, value="parameter")]) + self.check("import", [Token(KeywordKind.IMPORT, value="import")]) def test_has_location_information(self): self.check( "`define foo", [ - PREPROCESSOR(value="define", location=(("fn.v", (0, 6)), None)), - WHITESPACE(value=" ", location=(("fn.v", (7, 7)), None)), - IDENTIFIER(value="foo", location=(("fn.v", (8, 10)), None)), + Token(TokenKind.PREPROCESSOR, value="define", location=(("fn.v", (0, 6)), None)), + Token(TokenKind.WHITESPACE, value=" ", location=(("fn.v", (7, 7)), None)), + Token(TokenKind.IDENTIFIER, value="foo", location=(("fn.v", (8, 10)), None)), ], strip_loc=False, ) @@ -149,16 +133,16 @@ def test_has_location_information(self): def setUp(self): self.tokenizer = VerilogTokenizer() - def check(self, code, tokens, strip_loc=True): + def check(self, code: str, tokens: List[Token], strip_loc: bool = True): """ Helper method to test tokenizer Tokenize code and check that it matches tokens optionally strip location information in comparison """ - def preprocess(tokens): # pylint: disable=missing-docstring + def preprocess(tokens: List[Token]): # pylint: disable=missing-docstring if strip_loc: - return [token.kind(token.value, None) for token in tokens] + return [Token(token.kind, token.value) for token in tokens] return tokens diff --git a/vunit/parsing/tokenizer.py b/vunit/parsing/tokenizer.py index bd9655495..b8437f6cd 100644 --- a/vunit/parsing/tokenizer.py +++ b/vunit/parsing/tokenizer.py @@ -8,48 +8,41 @@ A general tokenizer """ -import collections +from dataclasses import dataclass +import logging import re +from typing import Callable, Dict, List, Optional, Tuple, Union +from typing_extensions import Self, Literal from vunit.ostools import read_file, file_exists, simplify_path +from typing_extensions import TypeAlias +from vunit.parsing.verilog.tokens import KeywordKind, TokenKind -TokenType = collections.namedtuple("TokenType", ["kind", "value", "location"]) +Location: TypeAlias = Tuple[Tuple[Optional[str], Tuple[int, int]], Optional["Location"]] -def Token(kind, value="", location=None): # pylint: disable=invalid-name - return TokenType(kind, value, location) +@dataclass(frozen=True) +class Token: + kind: Union[TokenKind, KeywordKind] + value: str = "" + location: Optional[Location] = None -class TokenKind: - pass - - -def new_token_kind(name: str) -> TokenKind: - """ - Create a new token kind with nice __repr__ - """ - - def new_token(kind, value="", location=None): - """ - Create new token of kind - """ - return Token(kind, value, location) - - cls = type(name, (object,), {"__repr__": lambda self: name, "__call__": new_token}) - return cls() - - -class Tokenizer(object): +class Tokenizer: """ Maintain a prioritized list of token regex """ + _regexs: List[Tuple[str, str]] + _assoc: Dict[str, Tuple[TokenKind, Optional[Callable[[Token], Optional[Token]]]]] + _regex: Optional[re.Pattern[str]] + def __init__(self): self._regexs = [] self._assoc = {} self._regex = None - def add(self, kind, regex, func=None): + def add(self, kind: TokenKind, regex: str, func: Optional[Callable[[Token], Optional[Token]]] = None) -> TokenKind: """ Add token type """ @@ -58,24 +51,33 @@ def add(self, kind, regex, func=None): self._assoc[key] = (kind, func) return kind - def finalize(self): + def finalize(self) -> None: self._regex = re.compile( "|".join(f"(?P<{spec[0]!s}>{spec[1]!s})" for spec in self._regexs), re.VERBOSE | re.MULTILINE, ) - def tokenize(self, code, file_name=None, previous_location=None, create_locations=False): + def tokenize( + self, + code: str, + file_name: Optional[str] = None, + previous_location: Optional[Location] = None, + create_locations: bool = False, + ) -> List[Token]: """ Tokenize the code """ - tokens = [] + tokens: List[Token] = [] start = 0 + + assert self._regex is not None while True: match = self._regex.search(code, pos=start) if match is None: break lexpos = (start, match.end() - 1) start = match.end() + assert match.lastgroup is not None kind, func = self._assoc[match.lastgroup] value = match.group(match.lastgroup) @@ -93,37 +95,40 @@ def tokenize(self, code, file_name=None, previous_location=None, create_location return tokens -class TokenStream(object): +class TokenStream: """ Helper class for traversing a stream of tokens """ - def __init__(self, tokens): + _tokens: List[Token] + _idx: int + + def __init__(self, tokens: List[Token]): self._tokens = tokens self._idx = 0 def __len__(self): return len(self._tokens) - def __getitem__(self, index): + def __getitem__(self, index: int) -> Token: return self._tokens[index] @property - def eof(self): + def eof(self) -> bool: return not self._idx < len(self._tokens) @property - def idx(self): + def idx(self) -> int: return self._idx @property - def current(self): + def current(self) -> Token: return self._tokens[self._idx] - def peek(self, offset=0): + def peek(self, offset: int = 0) -> Token: return self._tokens[self._idx + offset] - def skip_while(self, *kinds): + def skip_while(self, *kinds: TokenKind) -> int: """ Skip forward while token kind is present """ @@ -133,7 +138,7 @@ def skip_while(self, *kinds): self._idx += 1 return self._idx - def skip_until(self, *kinds): + def skip_until(self, *kinds: TokenKind) -> int: """ Skip forward until token kind is present """ @@ -143,7 +148,7 @@ def skip_until(self, *kinds): self._idx += 1 return self._idx - def pop(self): + def pop(self) -> Token: """ Return current token and advance stream """ @@ -153,7 +158,7 @@ def pop(self): self._idx += 1 return self._tokens[self._idx - 1] - def expect(self, *kinds): + def expect(self, *kinds: TokenKind) -> Token: """ Expect to pop token with any of kinds """ @@ -163,11 +168,11 @@ def expect(self, *kinds): raise LocationException.error(f"Expected {expected!s} got {token.kind!s}", token.location) return token - def slice(self, start, end): + def slice(self, start: int, end: int) -> List[Token]: return self._tokens[start:end] -def describe_location(location, first=True): +def describe_location(location: Optional[Location], first: bool = True) -> str: """ Describe the location as a string """ @@ -221,25 +226,25 @@ class LocationException(Exception): """ @classmethod - def error(cls, message, location): + def error(cls, message: str, location: Optional[Location]) -> Self: return cls(message, location, "error") @classmethod - def warning(cls, message, location): + def warning(cls, message: str, location: Optional[Location]) -> Self: return cls(message, location, "warning") @classmethod - def debug(cls, message, location): + def debug(cls, message: str, location: Optional[Location]) -> Self: return cls(message, location, "debug") - def __init__(self, message, location, severity): - Exception.__init__(self) + def __init__(self, message: str, location: Optional[Location], severity: Literal["debug", "warning", "error"]): + super().__init__(self) assert severity in ("debug", "warning", "error") self._severtity = severity self._message = message self._location = location - def log(self, logger): + def log(self, logger: logging.Logger) -> None: """ Log the exception """ @@ -253,7 +258,7 @@ def log(self, logger): method(self._message + "\n%s", describe_location(self._location)) -def add_previous(location, previous): +def add_previous(location: Optional[Location], previous: Optional[Location]) -> Optional[Location]: """ Add previous location """ @@ -264,7 +269,7 @@ def add_previous(location, previous): return (current, add_previous(old_previous, previous)) -def strip_previous(location): +def strip_previous(location: Optional[Location]) -> Optional[Tuple[Optional[str], Tuple[int, int]]]: """ Strip previous location """ diff --git a/vunit/parsing/verilog/parser.py b/vunit/parsing/verilog/parser.py index 89432cec7..59ae809c7 100644 --- a/vunit/parsing/verilog/parser.py +++ b/vunit/parsing/verilog/parser.py @@ -13,50 +13,45 @@ import logging from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple +from typing_extensions import Self from vunit.ostools import read_file from vunit.parsing.encodings import HDL_FILE_ENCODING -from vunit.parsing.tokenizer import TokenStream, EOFException, LocationException +from vunit.parsing.tokenizer import Token, TokenStream, EOFException, LocationException from vunit.parsing.verilog.tokenizer import VerilogTokenizer from vunit.parsing.verilog.preprocess import ( + Defines, + IncludePaths, + IncludedFiles, VerilogPreprocessor, find_included_file, Macro, ) -from vunit.parsing.verilog.tokens import ( - BEGIN, - COLON, - COMMENT, - END, - ENDMODULE, - DOUBLE_COLON, - HASH, - IDENTIFIER, - IMPORT, - MODULE, - MULTI_COMMENT, - NEWLINE, - PACKAGE, - PARAMETER, - SEMI_COLON, - WHITESPACE, -) +from vunit.parsing.verilog.tokens import KeywordKind, TokenKind from vunit.cached import file_content_hash LOGGER = logging.getLogger(__name__) -class VerilogParser(object): +class VerilogParser: """ Parse a single Verilog file """ - def __init__(self, database=None): + _tokenizer: VerilogTokenizer + _preprocessor: VerilogPreprocessor + _database: Any + _content_cache: Dict[str, str] + + def __init__(self, database: Optional[Any] = None): self._tokenizer = VerilogTokenizer() self._preprocessor = VerilogPreprocessor(self._tokenizer) self._database = database self._content_cache = {} - def parse(self, file_name, include_paths=None, defines=None): + def parse( + self, file_name: str, include_paths: Optional[List[str]] = None, defines: Optional[Dict[str, Macro]] = None + ) -> "VerilogDesignFile": """ Parse verilog code """ @@ -90,13 +85,15 @@ def parse(self, file_name, include_paths=None, defines=None): return result @staticmethod - def _key(file_name): + def _key(file_name: str) -> bytes: """ Returns the database key for parse results of file_name """ return f"CachedVerilogParser.parse({str(Path(file_name).resolve)})".encode() - def _store_result(self, file_name, result, included_files, defines): + def _store_result( + self, file_name: str, result: "VerilogDesignFile", included_files: List[Tuple[str, str]], defines: Defines + ) -> "VerilogDesignFile": """ Store parse result into back into cache """ @@ -113,7 +110,7 @@ def _store_result(self, file_name, result, included_files, defines): ) return result - def _content_hash(self, file_name): + def _content_hash(self, file_name: Optional[str]) -> Optional[str]: """ Hash the contents of the file """ @@ -125,7 +122,9 @@ def _content_hash(self, file_name): ) return self._content_cache[file_name] - def _lookup_parse_cache(self, file_name, include_paths, defines): + def _lookup_parse_cache( + self, file_name: str, include_paths: IncludePaths, defines: Defines + ) -> "Optional[VerilogDesignFile]": """ Use verilog code from cache """ @@ -157,19 +156,26 @@ def _lookup_parse_cache(self, file_name, include_paths, defines): return old_result -class VerilogDesignFile(object): +class VerilogDesignFile: """ - Contains Verilog objecs found within a file + Contains Verilog objects found within a file """ + modules: "List[VerilogModule]" + packages: "List[VerilogPackage]" + imports: List[str] + package_references: List[str] + instances: List[str] + included_files: IncludedFiles + def __init__( # pylint: disable=too-many-arguments self, - modules=None, - packages=None, - imports=None, - package_references=None, - instances=None, - included_files=None, + modules: "Optional[List[VerilogModule]]" = None, + packages: "Optional[List[VerilogPackage]]" = None, + imports: Optional[List[str]] = None, + package_references: Optional[List[str]] = None, + instances: Optional[List[str]] = None, + included_files: Optional[IncludedFiles] = None, ): self.modules = [] if modules is None else modules self.packages = [] if packages is None else packages @@ -179,11 +185,15 @@ def __init__( # pylint: disable=too-many-arguments self.included_files = [] if included_files is None else included_files @classmethod - def parse(cls, tokens, included_files): + def parse(cls, tokens: List[Token], included_files: IncludedFiles) -> Self: """ Parse verilog file """ - tokens = [token for token in tokens if token.kind not in (WHITESPACE, COMMENT, NEWLINE, MULTI_COMMENT)] + tokens = [ + token + for token in tokens + if token.kind not in (TokenKind.WHITESPACE, TokenKind.COMMENT, TokenKind.NEWLINE, TokenKind.MULTI_COMMENT) + ] return cls( modules=VerilogModule.find(tokens), packages=VerilogPackage.find(tokens), @@ -194,21 +204,21 @@ def parse(cls, tokens, included_files): ) @staticmethod - def find_imports(tokens): + def find_imports(tokens: List[Token]) -> List[str]: """ Find imports """ - results = [] + results: List[str] = [] stream = TokenStream(tokens) while not stream.eof: token = stream.pop() - if token.kind != IMPORT: + if token.kind != KeywordKind.IMPORT: continue import_token = token try: token = stream.pop() - if token.kind == IDENTIFIER: + if token.kind == TokenKind.IDENTIFIER: results.append(token.value) else: LocationException.warning("import bad argument", token.location).log(LOGGER) @@ -217,41 +227,41 @@ def find_imports(tokens): return results @staticmethod - def find_package_references(tokens): + def find_package_references(tokens: List[Token]) -> List[str]: """ Find package_references pkg::func """ - results = [] + results: List[str] = [] stream = TokenStream(tokens) while not stream.eof: token = stream.pop() - if token.kind == IMPORT: - stream.skip_until(SEMI_COLON) + if token.kind == KeywordKind.IMPORT: + stream.skip_until(TokenKind.SEMI_COLON) if not stream.eof: stream.pop() - elif token.kind == IDENTIFIER and not stream.eof: + elif token.kind == TokenKind.IDENTIFIER and not stream.eof: kind = stream.pop().kind - if kind == DOUBLE_COLON: + if kind == TokenKind.DOUBLE_COLON: results.append(token.value) - stream.skip_while(IDENTIFIER, DOUBLE_COLON) + stream.skip_while(TokenKind.IDENTIFIER, TokenKind.DOUBLE_COLON) return results @staticmethod - def find_instances(tokens): + def find_instances(tokens: List[Token]) -> List[str]: """ Find module instances """ - results = [] + results: List[str] = [] stream = TokenStream(tokens) while not stream.eof: token = stream.pop() - if token.kind in (BEGIN, END): + if token.kind in (KeywordKind.BEGIN, KeywordKind.END): _parse_block_label(stream) continue - if not token.kind == IDENTIFIER: + if not token.kind == TokenKind.IDENTIFIER: continue modulename = token.value @@ -260,72 +270,75 @@ def find_instances(tokens): except EOFException: continue - if token.kind == HASH: + if token.kind == TokenKind.HASH: results.append(modulename) - elif token.kind == IDENTIFIER: + elif token.kind == TokenKind.IDENTIFIER: results.append(modulename) return results -def _parse_block_label(stream): +def _parse_block_label(stream: TokenStream) -> None: """ Parse a optional block label after begin|end keyword """ try: token = stream.peek() - if token.kind != COLON: + if token.kind != TokenKind.COLON: # Is not block label return stream.pop() - stream.expect(IDENTIFIER) + stream.expect(TokenKind.IDENTIFIER) except EOFException: return -class VerilogModule(object): +class VerilogModule: """ A verilog module """ - def __init__(self, name, parameters): + name: str + parameters: List[str] + + def __init__(self, name: str, parameters: List[str]): self.name = name self.parameters = parameters @classmethod - def parse_parameter(cls, idx, tokens): + def parse_parameter(cls, idx: int, tokens: List[Token]) -> Optional[str]: """ Parse parameter at point """ - if not tokens[idx].kind == PARAMETER: + if not tokens[idx].kind == KeywordKind.PARAMETER: return None - if tokens[idx + 2].kind == IDENTIFIER: + if tokens[idx + 2].kind == TokenKind.IDENTIFIER: return tokens[idx + 2].value return tokens[idx + 1].value @classmethod - def find(cls, tokens): + def find(cls, tokens: List[Token]) -> List[Self]: """ Find all modules within code, nested modules are ignored """ idx = 0 - name = None + name = "" balance = 0 - results = [] - parameters = [] + results: List[Self] = [] + parameters: List[str] = [] while idx < len(tokens): - if tokens[idx].kind == MODULE: + if tokens[idx].kind == KeywordKind.MODULE: if balance == 0: name = tokens[idx + 1].value parameters = [] balance += 1 - elif tokens[idx].kind == ENDMODULE: + elif tokens[idx].kind == KeywordKind.ENDMODULE: balance -= 1 if balance == 0: results.append(cls(name, parameters)) @@ -339,23 +352,25 @@ def find(cls, tokens): return results -class VerilogPackage(object): +class VerilogPackage: """ A verilog package """ - def __init__(self, name): + name: str + + def __init__(self, name: str): self.name = name @classmethod - def find(cls, tokens): + def find(cls, tokens: List[Token]) -> List[Self]: """ Find all modules within code, nested modules are ignored """ idx = 0 - results = [] + results: List[Self] = [] while idx < len(tokens): - if tokens[idx].kind == PACKAGE: + if tokens[idx].kind == KeywordKind.PACKAGE: idx += 1 name = tokens[idx].value results.append(cls(name)) diff --git a/vunit/parsing/verilog/preprocess.py b/vunit/parsing/verilog/preprocess.py index 9a053dfb8..5fce3914f 100644 --- a/vunit/parsing/verilog/preprocess.py +++ b/vunit/parsing/verilog/preprocess.py @@ -10,9 +10,12 @@ """ Verilog parsing functionality """ +from typing import Dict, List, Optional, Set, Tuple +from typing_extensions import Self from pathlib import Path import logging from vunit.parsing.tokenizer import ( + Location, TokenStream, Token, add_previous, @@ -20,37 +23,169 @@ EOFException, LocationException, ) -from vunit.parsing.verilog.tokens import ( - COMMA, - EQUAL, - IDENTIFIER, - LBRACE, - LBRACKET, - LPAR, - NEWLINE, - PREPROCESSOR, - RBRACE, - RBRACKET, - RPAR, - STRING, - WHITESPACE, -) +from vunit.parsing.verilog.tokenizer import VerilogTokenizer +from vunit.parsing.verilog.tokens import TokenKind from vunit.ostools import read_file LOGGER = logging.getLogger(__name__) -class VerilogPreprocessor(object): +Defines = Dict[str, "Macro"] +IncludePaths = List[str] +IncludedFiles = List[Tuple[str, Optional[str]]] + + +class Macro: + """ + A `define macro with zero or more arguments + """ + + name: str + tokens: List[Token] + args: Tuple[str, ...] + defaults: Dict[str, List[Token]] + + def __init__( + self, + name: str, + tokens: Optional[List[Token]] = None, + args: Tuple[str, ...] = tuple(), + defaults: Optional[Dict[str, List[Token]]] = None, + ): + self.name = name + self.tokens = [] if tokens is None else tokens + self.args = args + self.defaults = {} if defaults is None else defaults + + @property + def num_args(self) -> int: + return len(self.args) + + def __repr__(self) -> str: + return f"Macro({self.name!r}, {self.tokens!r} {self.args!r}, {self.defaults!r})" + + def expand(self, values: List[List[Token]], previous: Optional[Location]) -> List[Token]: + """ + Expand macro with actual values, returns a list of expanded tokens + """ + tokens: List[Token] = [] + for token in self.tokens: + if token.kind == TokenKind.IDENTIFIER and token.value in self.args: + idx = self.args.index(token.value) + value = values[idx] + tokens += value + else: + tokens.append(token) + return [Token(tok.kind, tok.value, add_previous(tok.location, previous)) for tok in tokens] + + def __eq__(self, other: Self) -> bool: + return ( + (self.name == other.name) + and (self.tokens == other.tokens) + and (self.args == other.args) + and (self.defaults == other.defaults) + ) + + def expand_from_stream(self, token: Token, stream: TokenStream, previous: Optional[Location] = None) -> List[Token]: + """ + Expand macro consuming arguments from the stream + returns the expanded tokens + """ + if self.num_args == 0: + values: List[List[Token]] = [] + else: + try: + values = self._parse_macro_actuals(token, stream) + except EOFException as exe: + raise LocationException.warning( + "EOF reached when parsing `define actuals", location=token.location + ) from exe + + # Bind defaults + if len(values) < len(self.args): + for i in range(len(values), len(self.args)): + name = self.args[i] + if name in self.defaults: + values.append(self.defaults[name]) + else: + raise LocationException.warning(f"Missing value for argument {name!s}", token.location) + + elif len(values) > len(self.args): + raise LocationException.warning( + f"Too many arguments got {len(values):d} expected {len(self.args):d}", + token.location, + ) + + return self.expand(values, previous) + + @staticmethod + def _parse_macro_actuals(define_token: Token, stream: TokenStream) -> List[List[Token]]: + """ + Parse the actual values of macro call such as + 1 2 in `macro(1, 2) + """ + + stream.skip_while(TokenKind.WHITESPACE) + + token = stream.pop() + if token.kind != TokenKind.LPAR: + raise LocationException.warning("Bad `define argument list", define_token.location) + token = stream.pop() + value: List[Token] = [] + values: List[List[Token]] = [] + + bracket_count = 0 + brace_count = 0 + par_count = 0 + + while not (token.kind == TokenKind.RPAR and par_count == 0): + if token.kind is TokenKind.LBRACKET: + bracket_count += 1 + elif token.kind is TokenKind.RBRACKET: + bracket_count += -1 + elif token.kind is TokenKind.LBRACE: + brace_count += 1 + elif token.kind is TokenKind.RBRACE: + brace_count += -1 + elif token.kind is TokenKind.LPAR: + par_count += 1 + elif token.kind is TokenKind.RPAR: + par_count += -1 + + value_ok = token.kind == TokenKind.COMMA and bracket_count == 0 and brace_count == 0 and par_count == 0 + + if value_ok: + values.append(value) + value = [] + else: + value.append(token) + token = stream.pop() + + values.append(value) + return values + + +class VerilogPreprocessor: """ A Verilog preprocessor """ - def __init__(self, tokenizer): + _tokenizer: VerilogTokenizer + _macro_trace: Set[Tuple[Tuple[str | None, Tuple[int, int]] | None, int]] + _include_trace: Set[Tuple[Tuple[str | None, Tuple[int, int]] | None, int]] + + def __init__(self, tokenizer: VerilogTokenizer): self._tokenizer = tokenizer self._macro_trace = set() self._include_trace = set() - def preprocess(self, tokens, defines=None, include_paths=None, included_files=None): + def preprocess( + self, + tokens: List[Token], + defines: Optional[Defines] = None, + include_paths: Optional[IncludePaths] = None, + included_files: Optional[IncludedFiles] = None, + ) -> List[Token]: """ Entry point of preprocessing """ @@ -58,7 +193,13 @@ def preprocess(self, tokens, defines=None, include_paths=None, included_files=No self._macro_trace = set() return self._preprocess(tokens, defines, include_paths, included_files) - def _preprocess(self, tokens, defines=None, include_paths=None, included_files=None): + def _preprocess( + self, + tokens: List[Token], + defines: Optional[Defines] = None, + include_paths: Optional[IncludePaths] = None, + included_files: Optional[IncludedFiles] = None, + ) -> List[Token]: """ Pre-process tokens while filling in defines """ @@ -66,11 +207,11 @@ def _preprocess(self, tokens, defines=None, include_paths=None, included_files=N include_paths = [] if include_paths is None else include_paths included_files = [] if included_files is None else included_files defines = {} if defines is None else defines - result = [] + result: List[Token] = [] while not stream.eof: token = stream.pop() - if not token.kind == PREPROCESSOR: + if not token.kind == TokenKind.PREPROCESSOR: result.append(token) continue @@ -82,8 +223,13 @@ def _preprocess(self, tokens, defines=None, include_paths=None, included_files=N return result def preprocessor( # pylint: disable=too-many-arguments,too-many-branches - self, token, stream, defines, include_paths, included_files - ): + self, + token: Token, + stream: TokenStream, + defines: Defines, + include_paths: IncludePaths, + included_files: IncludedFiles, + ) -> List[Token]: """ Handle preprocessor token """ @@ -119,14 +265,14 @@ def preprocessor( # pylint: disable=too-many-arguments,too-many-branches elif token.value in ("timescale", "default_nettype", "unconnected_drive"): # Ignore directive and arguments - stream.skip_until(NEWLINE) + stream.skip_until(TokenKind.NEWLINE) elif token.value == "pragma": - stream.skip_while(WHITESPACE) + stream.skip_while(TokenKind.WHITESPACE) pp_token = stream.pop() if pp_token.value == "protect": - stream.skip_while(WHITESPACE) + stream.skip_while(TokenKind.WHITESPACE) token = stream.pop() if token.value == "begin_protected": @@ -140,7 +286,7 @@ def preprocessor( # pylint: disable=too-many-arguments,too-many-branches return [] @staticmethod - def _skip_protected_region(stream): + def _skip_protected_region(stream: TokenStream) -> None: """ Skip a protected region `pragma protect begin_protected @@ -148,22 +294,27 @@ def _skip_protected_region(stream): `pragma protect end_protected """ while not stream.eof: - stream.skip_while(WHITESPACE) + stream.skip_while(TokenKind.WHITESPACE) token = stream.pop() - if token.kind == PREPROCESSOR and token.value == "pragma": - stream.skip_while(WHITESPACE) + if token.kind == TokenKind.PREPROCESSOR and token.value == "pragma": + stream.skip_while(TokenKind.WHITESPACE) token = stream.pop() if token.value == "protect": - stream.skip_while(WHITESPACE) + stream.skip_while(TokenKind.WHITESPACE) token = stream.pop() if token.value == "end_protected": return def expand_macro( # pylint: disable=too-many-arguments - self, macro_token, stream, defines, include_paths, included_files + self, + macro_token: Token, + stream: TokenStream, + defines: Defines, + include_paths: IncludePaths, + included_files: IncludedFiles, ): """ Expand a macro @@ -189,20 +340,20 @@ def expand_macro( # pylint: disable=too-many-arguments return tokens @staticmethod - def if_statement(if_token, stream, defines): + def if_statement(if_token: Token, stream: TokenStream, defines: Defines) -> List[Token]: """ Handle if statement """ - def check_arg(if_token, arg): + def check_arg(if_token: Token, arg: Token) -> None: """ Check the define argument of an if statement """ - if arg.kind != IDENTIFIER: + if arg.kind != TokenKind.IDENTIFIER: raise LocationException.warning(f"Bad argument to `{if_token.value!s}", arg.location) - stream.skip_while(NEWLINE) + stream.skip_while(TokenKind.NEWLINE) - def determine_if_taken(if_token, arg): + def determine_if_taken(if_token: Token, arg: Token) -> bool: """ Determine if the branch was taken """ @@ -214,8 +365,8 @@ def determine_if_taken(if_token, arg): raise ValueError(f"Invalid if token {if_token.value!r}") - result = [] - stream.skip_while(WHITESPACE) + result: List[Token] = [] + stream.skip_while(TokenKind.WHITESPACE) arg = stream.pop() check_arg(if_token, arg) @@ -224,7 +375,7 @@ def determine_if_taken(if_token, arg): count = 1 while True: token = stream.pop() - if token.kind == PREPROCESSOR: + if token.kind == TokenKind.PREPROCESSOR: if token.value in ("ifdef", "ifndef"): count += 1 elif token.value == "endif": @@ -232,18 +383,18 @@ def determine_if_taken(if_token, arg): if count == 0: break - if count == 1 and (token.kind, token.value) == (PREPROCESSOR, "else"): - stream.skip_while(NEWLINE) + if count == 1 and (token.kind, token.value) == (TokenKind.PREPROCESSOR, "else"): + stream.skip_while(TokenKind.NEWLINE) if not any_taken: taken = True any_taken = True else: taken = False - elif count == 1 and (token.kind, token.value) == (PREPROCESSOR, "elsif"): - stream.skip_while(WHITESPACE) + elif count == 1 and (token.kind, token.value) == (TokenKind.PREPROCESSOR, "elsif"): + stream.skip_while(TokenKind.WHITESPACE) arg = stream.pop() check_arg(token, arg) - stream.skip_while(NEWLINE) + stream.skip_while(TokenKind.NEWLINE) if not any_taken: taken = determine_if_taken(token, arg) any_taken = taken @@ -251,20 +402,27 @@ def determine_if_taken(if_token, arg): taken = False elif taken: result.append(token) - stream.skip_while(NEWLINE) + stream.skip_while(TokenKind.NEWLINE) return result - def include(self, token, stream, include_paths, included_files, defines): # pylint: disable=too-many-arguments + def include( + self, + token: Token, + stream: TokenStream, + include_paths: IncludePaths, + included_files: IncludedFiles, + defines: Defines, + ) -> List[Token]: # pylint: disable=too-many-arguments """ Handle `include directive """ - stream.skip_while(WHITESPACE) + stream.skip_while(TokenKind.WHITESPACE) try: tok = stream.pop() except EOFException as exe: raise LocationException.warning("EOF reached when parsing `include argument", token.location) from exe - if tok.kind == PREPROCESSOR: + if tok.kind == TokenKind.PREPROCESSOR: if tok.value in defines: macro = defines[tok.value] else: @@ -279,12 +437,12 @@ def include(self, token, stream, include_paths, included_files, defines): # pyl tok.location, ) - if expanded_tokens[0].kind != STRING: + if expanded_tokens[0].kind != TokenKind.STRING: raise LocationException.warning("Verilog `include has bad argument", expanded_tokens[0].location) file_name_tok = expanded_tokens[0] - elif tok.kind == STRING: + elif tok.kind == TokenKind.STRING: file_name_tok = tok else: raise LocationException.warning("Verilog `include bad argument", tok.location) @@ -319,7 +477,7 @@ def include(self, token, stream, include_paths, included_files, defines): # pyl return included_tokens -def find_included_file(include_paths, file_name): +def find_included_file(include_paths: List[str], file_name: str) -> Optional[str]: """ Find the file to include given include_paths """ @@ -330,17 +488,17 @@ def find_included_file(include_paths, file_name): return None -def undef(undef_token, stream, defines): +def undef(undef_token: Token, stream: TokenStream, defines: Defines) -> None: """ Handles undef directive """ - stream.skip_while(WHITESPACE, NEWLINE) + stream.skip_while(TokenKind.WHITESPACE, TokenKind.NEWLINE) try: name_token = stream.pop() except EOFException as exe: raise LocationException.warning("EOF reached when parsing `undef", undef_token.location) from exe - if name_token.kind != IDENTIFIER: + if name_token.kind != TokenKind.IDENTIFIER: raise LocationException.warning("Bad argument to `undef", name_token.location) if name_token.value not in defines: @@ -349,17 +507,17 @@ def undef(undef_token, stream, defines): del defines[name_token.value] -def define(define_token, stream): +def define(define_token: Token, stream: TokenStream) -> Optional[Macro]: """ Handle a `define directive """ - stream.skip_while(WHITESPACE, NEWLINE) + stream.skip_while(TokenKind.WHITESPACE, TokenKind.NEWLINE) try: name_token = stream.pop() except EOFException as exe: raise LocationException.warning("Verilog `define without argument", define_token.location) from exe - if name_token.kind != IDENTIFIER: + if name_token.kind != TokenKind.IDENTIFIER: raise LocationException.warning("Verilog `define invalid name", name_token.location) name = name_token.value @@ -370,26 +528,26 @@ def define(define_token, stream): # Empty define return Macro(name) - if token.kind in (NEWLINE,): + if token.kind in (TokenKind.NEWLINE,): # Empty define return Macro(name) - if token.kind in (WHITESPACE,): + if token.kind in (TokenKind.WHITESPACE,): # Define without arguments args = tuple() defaults = {} - elif token.kind == LPAR: + elif token.kind == TokenKind.LPAR: lpar_token = token args = tuple() defaults = {} try: - while token.kind != RPAR: - if token.kind == IDENTIFIER: + while token.kind != TokenKind.RPAR: + if token.kind == TokenKind.IDENTIFIER: argname = token.value args = args + (argname,) token = stream.pop() - if token.kind == EQUAL: + if token.kind == TokenKind.EQUAL: token = stream.pop() defaults[argname] = [token] token = stream.pop() @@ -400,128 +558,9 @@ def define(define_token, stream): "EOF reached when parsing `define argument list", lpar_token.location ) from exe - stream.skip_while(WHITESPACE) + stream.skip_while(TokenKind.WHITESPACE) start = stream.idx - end = stream.skip_until(NEWLINE) + end = stream.skip_until(TokenKind.NEWLINE) if not stream.eof: stream.pop() return Macro(name, tokens=stream.slice(start, end), args=args, defaults=defaults) - - -class Macro(object): - """ - A `define macro with zero or more arguments - """ - - def __init__(self, name, tokens=None, args=tuple(), defaults=None): - self.name = name - self.tokens = [] if tokens is None else tokens - self.args = args - self.defaults = {} if defaults is None else defaults - - @property - def num_args(self): - return len(self.args) - - def __repr__(self): - return f"Macro({self.name!r}, {self.tokens!r} {self.args!r}, {self.defaults!r})" - - def expand(self, values, previous): - """ - Expand macro with actual values, returns a list of expanded tokens - """ - tokens = [] - for token in self.tokens: - if token.kind == IDENTIFIER and token.value in self.args: - idx = self.args.index(token.value) - value = values[idx] - tokens += value - else: - tokens.append(token) - return [Token(tok.kind, tok.value, add_previous(tok.location, previous)) for tok in tokens] - - def __eq__(self, other): - return ( - (self.name == other.name) - and (self.tokens == other.tokens) - and (self.args == other.args) - and (self.defaults == other.defaults) - ) - - def expand_from_stream(self, token, stream, previous=None): - """ - Expand macro consuming arguments from the stream - returns the expanded tokens - """ - if self.num_args == 0: - values = [] - else: - try: - values = self._parse_macro_actuals(token, stream) - except EOFException as exe: - raise LocationException.warning( - "EOF reached when parsing `define actuals", location=token.location - ) from exe - - # Bind defaults - if len(values) < len(self.args): - for i in range(len(values), len(self.args)): - name = self.args[i] - if name in self.defaults: - values.append(self.defaults[name]) - else: - raise LocationException.warning(f"Missing value for argument {name!s}", token.location) - - elif len(values) > len(self.args): - raise LocationException.warning( - f"Too many arguments got {len(values):d} expected {len(self.args):d}", - token.location, - ) - - return self.expand(values, previous) - - @staticmethod - def _parse_macro_actuals(define_token, stream): - """ - Parse the actual values of macro call such as - 1 2 in `macro(1, 2) - """ - - stream.skip_while(WHITESPACE) - - token = stream.pop() - if token.kind != LPAR: - raise LocationException.warning("Bad `define argument list", define_token.location) - token = stream.pop() - value = [] - values = [] - - bracket_count = 0 - brace_count = 0 - par_count = 0 - - while not (token.kind == RPAR and par_count == 0): - if token.kind is LBRACKET: - bracket_count += 1 - elif token.kind is RBRACKET: - bracket_count += -1 - elif token.kind is LBRACE: - brace_count += 1 - elif token.kind is RBRACE: - brace_count += -1 - elif token.kind is LPAR: - par_count += 1 - elif token.kind is RPAR: - par_count += -1 - - value_ok = token.kind == COMMA and bracket_count == 0 and brace_count == 0 and par_count == 0 - - if value_ok: - values.append(value) - value = [] - else: - value.append(token) - token = stream.pop() - - values.append(value) - return values diff --git a/vunit/parsing/verilog/tokenizer.py b/vunit/parsing/verilog/tokenizer.py index 95c2b68df..06d42edad 100644 --- a/vunit/parsing/verilog/tokenizer.py +++ b/vunit/parsing/verilog/tokenizer.py @@ -12,120 +12,101 @@ Verilog preprocessing """ -from vunit.parsing.tokenizer import Tokenizer, Token -from vunit.parsing.verilog.tokens import ( - COLON, - COMMA, - COMMENT, - EQUAL, - ESCAPED_NEWLINE, - DOUBLE_COLON, - HASH, - IDENTIFIER, - KEYWORDS, - LBRACE, - LBRACKET, - LPAR, - MULTI_COMMENT, - NEWLINE, - OTHER, - PREPROCESSOR, - RBRACE, - RBRACKET, - RPAR, - SEMI_COLON, - STRING, - WHITESPACE, -) - - -class VerilogTokenizer(object): +from typing import Callable, Optional +from vunit.parsing.tokenizer import Location, Tokenizer, Token +from vunit.parsing.verilog.tokens import KEYWORDS, KeywordKind, TokenKind + + +class VerilogTokenizer: """ A Verilog tokenizer """ - def __init__(self, create_locations=True): + _tokenizer: Tokenizer + _create_locations: bool + + def __init__(self, create_locations: bool = True): self._tokenizer = Tokenizer() self._create_locations = create_locations - def slice_value(token, start=None, end=None): + def slice_value(token: Token, start: Optional[int] = None, end: Optional[int] = None): return Token(token.kind, token.value[start:end], token.location) - def str_value(token): + def str_value(token: Token) -> Token: return Token( token.kind, token.value[1:-1].replace("\\\n", "").replace('\\"', '"'), token.location, ) - def remove_value(token): + def remove_value(token: Token) -> Token: return Token(token.kind, "", token.location) - def ignore_value(token): # pylint: disable=unused-argument + def ignore_value(_: Token) -> None: # pylint: disable=unused-argument pass - def add(kind, regex, func=None): + def add(kind: TokenKind, regex: str, func: Optional[Callable[[Token], Optional[Token]]] = None): self._tokenizer.add(kind, regex, func) - def replace_keywords(token): # pylint: disable=missing-docstring + def replace_keywords(token: Token) -> Token: # pylint: disable=missing-docstring if token.value in KEYWORDS: - return Token(KEYWORDS[token.value], token.value, token.location) + return Token(KeywordKind(token.value), token.value, token.location) return token add( - PREPROCESSOR, + TokenKind.PREPROCESSOR, r"`[a-zA-Z][a-zA-Z0-9_]*", lambda token: slice_value(token, start=1), ) - add(STRING, r'(?