Skip to content

Commit a110a55

Browse files
authored
Merge pull request #63 from robotpy/pp-encoding
Variety of preprocessor related fixes
2 parents acc2b27 + a13cdf4 commit a110a55

File tree

6 files changed

+133
-20
lines changed

6 files changed

+133
-20
lines changed

.github/workflows/dist.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
run: python setup.py bdist_wheel
8282

8383
- name: Install test dependencies
84-
run: python -m pip --disable-pip-version-check install pytest
84+
run: python -m pip --disable-pip-version-check install pytest pcpp
8585

8686
- name: Test wheel
8787
shell: bash

cxxheaderparser/dump.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,20 @@ def dumpmain() -> None:
2626
parser.add_argument(
2727
"--pcpp", default=False, action="store_true", help="Use pcpp preprocessor"
2828
)
29+
parser.add_argument(
30+
"--encoding", default=None, help="Use this encoding to open the file"
31+
)
2932

3033
args = parser.parse_args()
3134

3235
preprocessor = None
3336
if args.pcpp:
3437
from .preprocessor import make_pcpp_preprocessor
3538

36-
preprocessor = make_pcpp_preprocessor()
39+
preprocessor = make_pcpp_preprocessor(encoding=args.encoding)
3740

3841
options = ParserOptions(verbose=args.verbose, preprocessor=preprocessor)
39-
data = parse_file(args.header, options=options)
42+
data = parse_file(args.header, encoding=args.encoding, options=options)
4043

4144
if args.mode == "pprint":
4245
ddata = dataclasses.asdict(data)

cxxheaderparser/gentest.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import typing
77

88
from .errors import CxxParseError
9+
from .preprocessor import make_pcpp_preprocessor
910
from .options import ParserOptions
1011
from .simple import parse_string, ParsedData
1112

@@ -49,14 +50,23 @@ def _inner_repr(o: typing.Any) -> str:
4950
return _inner_repr(data)
5051

5152

52-
def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) -> None:
53+
def gentest(
54+
infile: str, name: str, outfile: str, verbose: bool, fail: bool, pcpp: bool
55+
) -> None:
5356
# Goal is to allow making a unit test as easy as running this dumper
5457
# on a file and copy/pasting this into a test
5558

5659
with open(infile, "r") as fp:
5760
content = fp.read()
5861

62+
maybe_options = ""
63+
popt = ""
64+
5965
options = ParserOptions(verbose=verbose)
66+
if options:
67+
options.preprocessor = make_pcpp_preprocessor()
68+
maybe_options = "options = ParserOptions(preprocessor=make_pcpp_preprocessor())"
69+
popt = ", options=options"
6070

6171
try:
6272
data = parse_string(content, options=options)
@@ -74,15 +84,17 @@ def gentest(infile: str, name: str, outfile: str, verbose: bool, fail: bool) ->
7484
if not fail:
7585
stmt = nondefault_repr(data)
7686
stmt = f"""
77-
data = parse_string(content, cleandoc=True)
87+
{maybe_options}
88+
data = parse_string(content, cleandoc=True{popt})
7889
7990
assert data == {stmt}
8091
"""
8192
else:
8293
stmt = f"""
94+
{maybe_options}
8395
err = {repr(err)}
8496
with pytest.raises(CxxParseError, match=re.escape(err)):
85-
parse_string(content, cleandoc=True)
97+
parse_string(content, cleandoc=True{popt})
8698
"""
8799

88100
content = ("\n" + content.strip()).replace("\n", "\n ")
@@ -113,11 +125,12 @@ def test_{name}() -> None:
113125
parser = argparse.ArgumentParser()
114126
parser.add_argument("header")
115127
parser.add_argument("name", nargs="?", default="TODO")
128+
parser.add_argument("--pcpp", default=False, action="store_true")
116129
parser.add_argument("-v", "--verbose", default=False, action="store_true")
117130
parser.add_argument("-o", "--output", default="-")
118131
parser.add_argument(
119132
"-x", "--fail", default=False, action="store_true", help="Expect failure"
120133
)
121134
args = parser.parse_args()
122135

123-
gentest(args.header, args.name, args.output, args.verbose, args.fail)
136+
gentest(args.header, args.name, args.output, args.verbose, args.fail, args.pcpp)

cxxheaderparser/preprocessor.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import io
6+
import os
67
from os.path import relpath
78
import typing
89
from .options import PreprocessorFunction
@@ -15,9 +16,10 @@ class PreprocessorError(Exception):
1516

1617

1718
class _CustomPreprocessor(Preprocessor):
18-
def __init__(self):
19+
def __init__(self, encoding: typing.Optional[str]):
1920
Preprocessor.__init__(self)
20-
self.errors = []
21+
self.errors: typing.List[str] = []
22+
self.assume_encoding = encoding
2123

2224
def on_error(self, file, line, msg):
2325
self.errors.append(f"{file}:{line} error: {msg}")
@@ -34,21 +36,15 @@ def _filter_self(fname: str, fp: typing.TextIO) -> str:
3436
# isn't what a typical user of cxxheaderparser would want, so we strip out
3537
# the line directives and any content that isn't in our original file
3638

37-
# Compute the filename to match based on how pcpp does it
38-
try:
39-
relfname = relpath(fname)
40-
except Exception:
41-
relfname = fname
42-
relfname = relfname.replace("\\", "/")
43-
44-
relfname += '"\n'
39+
# pcpp always emits line directives that match whatever is passed in to it
40+
line_ending = f'{fname}"\n'
4541

4642
new_output = io.StringIO()
4743
keep = True
4844

4945
for line in fp:
5046
if line.startswith("#line"):
51-
keep = line.endswith(relfname)
47+
keep = line.endswith(line_ending)
5248

5349
if keep:
5450
new_output.write(line)
@@ -62,11 +58,14 @@ def make_pcpp_preprocessor(
6258
defines: typing.List[str] = [],
6359
include_paths: typing.List[str] = [],
6460
retain_all_content: bool = False,
61+
encoding: typing.Optional[str] = None,
6562
) -> PreprocessorFunction:
6663
"""
6764
Creates a preprocessor function that uses pcpp (which must be installed
6865
separately) to preprocess the input text.
6966
67+
:param encoding: If specified any include files are opened with this encoding
68+
7069
.. code-block:: python
7170
7271
pp = make_pcpp_preprocessor()
@@ -77,7 +76,7 @@ def make_pcpp_preprocessor(
7776
"""
7877

7978
def _preprocess_file(filename: str, content: str) -> str:
80-
pp = _CustomPreprocessor()
79+
pp = _CustomPreprocessor(encoding)
8180
if include_paths:
8281
for p in include_paths:
8382
pp.add_path(p)

cxxheaderparser/simple.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
2525
"""
2626

27+
import os
2728
import sys
2829
import inspect
2930
import typing
@@ -344,14 +345,15 @@ def parse_string(
344345

345346

346347
def parse_file(
347-
filename: str,
348+
filename: typing.Union[str, os.PathLike],
348349
encoding: typing.Optional[str] = None,
349350
*,
350351
options: typing.Optional[ParserOptions] = None,
351352
) -> ParsedData:
352353
"""
353354
Simple function to parse a header from a file and return a data structure
354355
"""
356+
filename = os.fsdecode(filename)
355357

356358
if encoding is None:
357359
encoding = "utf-8-sig"

tests/test_preprocessor.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import pathlib
2+
3+
from cxxheaderparser.options import ParserOptions
4+
from cxxheaderparser.preprocessor import make_pcpp_preprocessor
5+
from cxxheaderparser.simple import NamespaceScope, ParsedData, parse_file, parse_string
6+
from cxxheaderparser.types import (
7+
FundamentalSpecifier,
8+
NameSpecifier,
9+
PQName,
10+
Token,
11+
Type,
12+
Value,
13+
Variable,
14+
)
15+
16+
17+
def test_basic_preprocessor() -> None:
18+
content = """
19+
#define X 1
20+
int x = X;
21+
"""
22+
options = ParserOptions(preprocessor=make_pcpp_preprocessor())
23+
data = parse_string(content, cleandoc=True, options=options)
24+
25+
assert data == ParsedData(
26+
namespace=NamespaceScope(
27+
variables=[
28+
Variable(
29+
name=PQName(segments=[NameSpecifier(name="x")]),
30+
type=Type(
31+
typename=PQName(segments=[FundamentalSpecifier(name="int")])
32+
),
33+
value=Value(tokens=[Token(value="1")]),
34+
)
35+
]
36+
)
37+
)
38+
39+
40+
def test_preprocessor_omit_content(tmp_path: pathlib.Path) -> None:
41+
"""Ensure that content in other headers is omitted"""
42+
h_content = '#include "t2.h"' "\n" "int x = X;\n"
43+
h2_content = "#define X 2\n" "int omitted = 1;\n"
44+
45+
with open(tmp_path / "t1.h", "w") as fp:
46+
fp.write(h_content)
47+
48+
with open(tmp_path / "t2.h", "w") as fp:
49+
fp.write(h2_content)
50+
51+
options = ParserOptions(preprocessor=make_pcpp_preprocessor())
52+
data = parse_file(tmp_path / "t1.h", options=options)
53+
54+
assert data == ParsedData(
55+
namespace=NamespaceScope(
56+
variables=[
57+
Variable(
58+
name=PQName(segments=[NameSpecifier(name="x")]),
59+
type=Type(
60+
typename=PQName(segments=[FundamentalSpecifier(name="int")])
61+
),
62+
value=Value(tokens=[Token(value="2")]),
63+
)
64+
]
65+
)
66+
)
67+
68+
69+
def test_preprocessor_encoding(tmp_path: pathlib.Path) -> None:
70+
"""Ensure we can handle alternate encodings"""
71+
h_content = b"// \xa9 2023 someone\n" b'#include "t2.h"' b"\n" b"int x = X;\n"
72+
73+
h2_content = b"// \xa9 2023 someone\n" b"#define X 3\n" b"int omitted = 1;\n"
74+
75+
with open(tmp_path / "t1.h", "wb") as fp:
76+
fp.write(h_content)
77+
78+
with open(tmp_path / "t2.h", "wb") as fp:
79+
fp.write(h2_content)
80+
81+
options = ParserOptions(preprocessor=make_pcpp_preprocessor(encoding="cp1252"))
82+
data = parse_file(tmp_path / "t1.h", options=options, encoding="cp1252")
83+
84+
assert data == ParsedData(
85+
namespace=NamespaceScope(
86+
variables=[
87+
Variable(
88+
name=PQName(segments=[NameSpecifier(name="x")]),
89+
type=Type(
90+
typename=PQName(segments=[FundamentalSpecifier(name="int")])
91+
),
92+
value=Value(tokens=[Token(value="3")]),
93+
)
94+
]
95+
)
96+
)

0 commit comments

Comments
 (0)