Skip to content

Commit a079ee2

Browse files
committed
Improve HDLReader and its test
1 parent 3dd16e7 commit a079ee2

File tree

2 files changed

+115
-24
lines changed

2 files changed

+115
-24
lines changed

hdltools/hdl_reader.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#
2-
# Copyright (C) 2025 HDLtools Project
2+
# Copyright (C) 2025-2026 HDLtools Project
33
#
44
# SPDX-License-Identifier: GPL-3.0-or-later
55
#
66

77
"""
8-
Reads and sanitizes the input HDL code by removing comments, extra whitespaces
9-
and newlines.
8+
Reads and sanitizes the input HDL code by removing comments, trailing espaces
9+
and multiple empty lines.
1010
"""
1111

1212
import re
@@ -27,15 +27,35 @@ def set_code(self, code):
2727
"""Directly sets the HDL code."""
2828
self.code = code
2929

30-
def is_vhdl(self):
31-
"""Return True if the code seems to be VHDL."""
32-
return 'endmodule' not in self.code.lower()
33-
3430
def get_code(self):
3531
"""Retrieves the sanitized HDL code."""
36-
if self.is_vhdl():
37-
text = re.sub(r'--[^\n]*', '', self.code)
32+
text = self.code
33+
text = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
34+
if self._is_vhdl(text):
35+
text = re.sub(r'--.*', '', text)
36+
text = re.sub(
37+
r'\battribute\b.*?;', '', text,
38+
flags=re.IGNORECASE | re.MULTILINE
39+
)
3840
else:
39-
text = re.sub(r'//[^\n]*', '', self.code)
40-
text = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
41-
return re.sub(r'\s+', ' ', text).strip()
41+
text = re.sub(r'//.*', '', text)
42+
text = re.sub(r'\(\*.*?\*\)', '', text, flags=re.DOTALL)
43+
text = re.sub(r'[ \t]+$', '', text, flags=re.MULTILINE)
44+
text = re.sub(r'\n{3,}', '\n\n', text)
45+
return text.strip()
46+
47+
def _is_vhdl(self, text):
48+
"""Heuristic to determine if the code is VHDL or Verilog."""
49+
vhdl_score = 0
50+
vlog_score = 0
51+
52+
vhdl_keywords = r'\b(library|entity|architecture|signal|begin)\b'
53+
vlog_keywords = r'\b(endmodule|assign|logic|wire|reg|parameter)\b'
54+
55+
vhdl_matches = re.findall(vhdl_keywords, text, re.IGNORECASE)
56+
vhdl_score += len(set(vhdl_matches))
57+
58+
vlog_matches = re.findall(vlog_keywords, text)
59+
vlog_score += len(set(vlog_matches))
60+
61+
return vhdl_score > vlog_score

tests/test_reader.py

Lines changed: 83 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,91 @@
33
from pathlib import Path
44
from hdltools.hdl_reader import HDLReader
55

6+
INPUT = {}
7+
OUTPUT = {}
68

7-
@pytest.fixture
8-
def vcode():
9-
vfile = Path(__file__).parent.resolve() / 'hdl' / 'modules.sv'
10-
vobj = HDLReader()
11-
vobj.read_file(vfile)
12-
return vobj.get_code()
9+
INPUT['vhdl'] = '''
10+
/*
11+
Block comments (added in VHDL 2008)
12+
*/
13+
library IEEE; -- aaa
14+
use IEEE.STD_LOGIC_1164.ALL; -- bbb
15+
-- ccc
16+
entity dut is
17+
port (
18+
data_i : in std_logic; -- input
19+
data_o : out std_logic -- output
20+
);
21+
end dut;
22+
-- ddd
23+
-- eee
24+
architecture behav of dut /* fff */is
25+
begin
26+
attribute keep : string;
27+
/* ggg -- */
28+
attribute keep of data_o : signal is "true";
29+
data_o <= data_i;
30+
/*
31+
hhh
32+
-- */
33+
end behav;
34+
'''
35+
36+
OUTPUT['vhdl'] = '''
37+
library IEEE;
38+
use IEEE.STD_LOGIC_1164.ALL;
39+
40+
entity dut is
41+
port (
42+
data_i : in std_logic;
43+
data_o : out std_logic
44+
);
45+
end dut;
46+
47+
architecture behav of dut is
48+
begin
49+
50+
data_o <= data_i;
1351
52+
end behav;
53+
'''
1454

15-
def test_comments(vcode):
16-
comment_patterns = ['//', '/*', '*/', '--']
17-
assert not any(pattern in vcode for pattern in comment_patterns)
55+
INPUT['vlog'] = '''
56+
/*
57+
Block comments
58+
*/
59+
// aaa
60+
(* black_box *)module dut (
61+
input data_i, // input
62+
output data_o // output
63+
);
64+
// bbb
65+
// ccc
66+
/* ddd // */
67+
assign /* eee // */data_o = data_i;
68+
/*
69+
fff
70+
// */
71+
endmodule
72+
'''
1873

74+
OUTPUT['vlog'] = '''
75+
module dut (
76+
input data_i,
77+
output data_o
78+
);
1979
20-
def test_spaces(vcode):
21-
assert '\n' not in vcode
22-
assert ' ' not in vcode
80+
assign data_o = data_i;
81+
82+
endmodule
83+
'''
84+
85+
86+
@pytest.mark.parametrize("input_code, expected_output", [
87+
(INPUT['vhdl'], OUTPUT['vhdl']),
88+
(INPUT['vlog'], OUTPUT['vlog']),
89+
], ids=["VHDL", "Verilog"])
90+
def test_remove_comments(input_code, expected_output):
91+
vobj = HDLReader()
92+
vobj.set_code(input_code)
93+
assert vobj.get_code() == expected_output.strip()

0 commit comments

Comments
 (0)