Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 106 additions & 59 deletions cxxheaderparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import inspect
import re
import typing
from dataclasses import dataclass

from . import lexer
from .errors import CxxParseError
Expand Down Expand Up @@ -63,11 +64,26 @@
from .visitor import CxxVisitor, null_visitor

LexTokenList = typing.List[LexToken]
T = typing.TypeVar("T")

PT = typing.TypeVar("PT", Parameter, TemplateNonTypeParam)


@dataclass
class Operator:
"""An internal structure for parsing operator."""

#: Possibly qualified name for operator.
pqname: PQName

#: Conversion operator have always `conversion` str in this attribute.
operator_name: str

#: Return type for this operator.
ctype: Type

#: Return type modifiers for this operator.
cmods: ParsedTypeModifiers


class CxxParser:
"""
Single-use parser object
Expand Down Expand Up @@ -697,7 +713,7 @@ def _parse_template_specialization(self) -> TemplateSpecialization:

try:
parsed_type, mods = self._parse_type(None)
if parsed_type is None:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_parse_type now return Union of types. According to typing rules, there should be if checking which type is returned (that is the reason why function should not return Union)

if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="")
Expand Down Expand Up @@ -1022,7 +1038,7 @@ def _parse_using_typealias(
"""

parsed_type, mods = self._parse_type(None)
if parsed_type is None:
if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="parsing typealias")
Expand Down Expand Up @@ -1571,25 +1587,22 @@ def _parse_pqname_name_operator(self) -> LexTokenList:

def _parse_pqname_name(
self, tok_value: str
) -> typing.Tuple[NameSpecifier, typing.Optional[str]]:
name = ""
specialization = None
op = None

) -> typing.Tuple[NameSpecifier, LexTokenList]:
# parse out operators as that's generally useful
if tok_value == "operator":
op_parts = self._parse_pqname_name_operator()
op = "".join(o.value for o in op_parts)
name = f"operator{op}"

name = "operator" + "".join(o.value for o in op_parts)
else:
op_parts = []
name = tok_value

if self.lex.token_if("<"):
# template specialization
specialization = self._parse_template_specialization()
else:
specialization = None

return NameSpecifier(name, specialization), op
return NameSpecifier(name, specialization), op_parts

def _parse_pqname(
self,
Expand All @@ -1598,14 +1611,19 @@ def _parse_pqname(
fn_ok: bool = False,
compound_ok: bool = False,
fund_ok: bool = False,
) -> typing.Tuple[PQName, typing.Optional[str]]:
) -> typing.Tuple[PQName, LexTokenList]:
"""
Parses a possibly qualified function name or a type name, returns when
unexpected item encountered (but does not consume it)

:param fn_ok: Operator functions ok
:param compound_ok: Compound types ok
:param fund_ok: Fundamental types ok
:return: 2-element tuple where the first element is a possibly qualified
function name, and the second element is list of Lex tokens that
are used as operator name.
If this list is empty, then no operator is detected.


qualified_id: ["::"] nested_name_specifier ["template"] unqualified_id
| "::" IDENTIFIER
Expand Down Expand Up @@ -1640,7 +1658,7 @@ def _parse_pqname(

classkey = None
segments: typing.List[PQNameSegment] = []
op = None
op_parts: LexTokenList = []
has_typename = False

if tok is None:
Expand All @@ -1650,7 +1668,7 @@ def _parse_pqname(
raise self._parse_error(tok)

if tok.type == "auto":
return PQName([AutoSpecifier()]), None
return PQName([AutoSpecifier()]), []

_fundamentals = self._fundamentals

Expand All @@ -1675,7 +1693,7 @@ def _parse_pqname(
# Handle unnamed class/enum/struct
self.anon_id += 1
segments.append(AnonymousName(self.anon_id))
return PQName(segments, classkey), None
return PQName(segments, classkey), []
elif tok.type == "typename":
has_typename = True
tok = self.lex.token()
Expand Down Expand Up @@ -1711,9 +1729,9 @@ def _parse_pqname(
tok = self._next_token_must_be("NAME")
tok_value = tok.value

name, op = self._parse_pqname_name(tok_value)
name, op_parts = self._parse_pqname_name(tok_value)
segments.append(name)
if op:
if op_parts:
if not fn_ok:
# encountered unexpected operator
raise self._parse_error(tok, "NAME")
Expand All @@ -1732,10 +1750,10 @@ def _parse_pqname(
self.debug_print(
"parse_pqname: %s op=%s",
pqname,
op,
op_parts,
)

return pqname, op
return pqname, op_parts

#
# Function parsing
Expand All @@ -1758,7 +1776,7 @@ def _parse_parameter(
param_name = None
default = None
param_pack = False
parsed_type: typing.Optional[Type]
parsed_type: typing.Union[Type, Operator]
at_type: typing.Optional[Type] = None

if not tok:
Expand All @@ -1770,7 +1788,7 @@ def _parse_parameter(
else:
# required typename + decorators
parsed_type, mods = self._parse_type(tok)
if parsed_type is None:
if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="parsing parameter")
Expand Down Expand Up @@ -1883,7 +1901,7 @@ def _parse_trailing_return_type(
)

parsed_type, mods = self._parse_type(None)
if parsed_type is None:
if not isinstance(parsed_type, Type):
raise self._parse_error(None)

mods.validate(var_ok=False, meth_ok=False, msg="parsing trailing return type")
Expand Down Expand Up @@ -2301,7 +2319,7 @@ def _parse_type(
self,
tok: typing.Optional[LexToken],
operator_ok: bool = False,
) -> typing.Tuple[typing.Optional[Type], ParsedTypeModifiers]:
) -> typing.Tuple[typing.Union[Type, Operator], ParsedTypeModifiers]:
"""
This parses a typename and stops parsing when it hits something
that it doesn't understand. The caller uses the results to figure
Expand All @@ -2310,7 +2328,7 @@ def _parse_type(
This only parses the base type, does not parse pointers, references,
or additional const/volatile qualifiers

The returned type will only be None if operator_ok is True and an
The returned type will only be `Operator` if operator_ok is True and an
operator is encountered.
"""

Expand All @@ -2331,8 +2349,6 @@ def _parse_type(
tok = get_token()

pqname: typing.Optional[PQName] = None
pqname_optional = False
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is no longer used


_pqname_start_tokens = self._pqname_start_tokens
_attribute_start = self._attribute_start_tokens

Expand All @@ -2343,13 +2359,25 @@ def _parse_type(
if pqname is not None:
# found second set of names, done here
break

if operator_ok and tok_type == "operator":
# special case: conversion operators such as operator bool
pqname_optional = True
break
pqname, _ = self._parse_pqname(
tok, compound_ok=True, fn_ok=False, fund_ok=True
mods = ParsedTypeModifiers(vars, both, meths)
po = self._parse_member_operator()
return po, mods
Comment on lines +2365 to +2367
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we are parsing the old cases + some additional parsing that was in _parse_operator_conversion. I extracted that to a new function to make code a bit simpler.


pqname, op_parts = self._parse_pqname(
tok, compound_ok=True, fn_ok=True, fund_ok=True
)

if op_parts:
# special case: conversion operator, but also a free operator
mods = ParsedTypeModifiers(vars, both, meths)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only parsed when we detected operator as a free operator.

po = self._parse_free_operator(
pqname, op_parts, mods, const, volatile
)
return po, mods

elif tok_type in self._parse_type_ptr_ref_paren:
if pqname is None:
raise self._parse_error(tok)
Expand All @@ -2375,19 +2403,45 @@ def _parse_type(
tok = get_token()

if pqname is None:
if not pqname_optional:
raise self._parse_error(tok)
parsed_type = None
else:
# Construct a type from the parsed name
parsed_type = Type(pqname, const, volatile)
raise self._parse_error(tok)

# Construct a type from the parsed name
parsed_type = Type(pqname, const, volatile)

self.lex.return_token(tok)

# Always return the modifiers
mods = ParsedTypeModifiers(vars, both, meths)
return parsed_type, mods

def _parse_member_operator(self) -> Operator:
"""This function parses operator from class body."""
ctype, cmods = self._parse_type(None)
if not isinstance(ctype, Type):
raise self._parse_error(None)
pqname = PQName([NameSpecifier("operator")])
return Operator(pqname, "conversion", ctype, cmods)

def _parse_free_operator(
self,
pqname: PQName,
op_parts: LexTokenList,
mods: ParsedTypeModifiers,
const: bool,
volatile: bool,
) -> Operator:
"""This function parses operator implemented outside class body."""
last_seg = pqname.segments[-1]
assert isinstance(last_seg, NameSpecifier)
assert last_seg.name.startswith("operator")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit strange to me: why there is not used more complex type in NameSpecifier and operator name is concatenated? Anyway, we do not care here about this name. We take the real operator name from op variable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The operator name gets concatenated in _parse_pqname_name... I think it would be better for that function to not concatenate and let the eventual user of the operator make that call.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I changed _parse_pqname_name to not concatenate this. This caused even more large change for something that seems like it should be rather simple, because I changed return types for several functions from str|None to list[LexToken].

last_seg.name = "operator"

type_name = PQName(
[NameSpecifier(op.value) for op in op_parts if op.type != "DBL_COLON"]
)
t = Type(type_name, const, volatile)
return Operator(pqname, "conversion", t, mods)

def _parse_decl(
self,
parsed_type: Type,
Expand Down Expand Up @@ -2477,7 +2531,9 @@ def _parse_decl(

tok = self.lex.token_if_in_set(self._pqname_start_tokens)
if tok:
pqname, op = self._parse_pqname(tok, fn_ok=True)
pqname, op_parts = self._parse_pqname(tok, fn_ok=True)
if op_parts:
op = "".join(o.value for o in op_parts)

# TODO: "type fn(x);" is ambiguous here. Because this is a header
# parser, we assume it's a function, not a variable declaration
Expand Down Expand Up @@ -2538,41 +2594,32 @@ def _parse_decl(

def _parse_operator_conversion(
self,
operator: Operator,
mods: ParsedTypeModifiers,
location: Location,
doxygen: typing.Optional[str],
template: TemplateDeclTypeVar,
is_typedef: bool,
is_friend: bool,
) -> None:
tok = self._next_token_must_be("operator")

Comment on lines -2548 to -2549
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The token tok has already been processed in _parse_type.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, perhaps a more appropriate name for this function would be _parse_type_or_operator ?

if is_typedef:
raise self._parse_error(tok, "operator not permitted in typedef")

# next piece must be the conversion type
ctype, cmods = self._parse_type(None)
if ctype is None:
raise self._parse_error(None)
raise self._parse_error(None, "operator not permitted in typedef")

cmods.validate(var_ok=False, meth_ok=False, msg="parsing conversion operator")
operator.cmods.validate(
var_ok=False, meth_ok=False, msg="parsing conversion operator"
)

# Check for any cv decorations for the type
rtype = self._parse_cv_ptr(ctype)
rtype = self._parse_cv_ptr(operator.ctype)

# then this must be a method
self._next_token_must_be("(")

# make our own pqname/op here
segments: typing.List[PQNameSegment] = [NameSpecifier("operator")]
pqname = PQName(segments)
op = "conversion"

if self._parse_function(
mods,
rtype,
pqname,
op,
operator.pqname,
operator.operator_name,
template,
doxygen,
location,
Expand Down Expand Up @@ -2612,7 +2659,7 @@ def _parse_declarations(

# Check to see if this might be a class/enum declaration
if (
parsed_type is not None
isinstance(parsed_type, Type)
and parsed_type.typename.classkey
and self._maybe_parse_class_enum_decl(
parsed_type, mods, doxygen, template, is_typedef, is_friend, location
Expand All @@ -2635,10 +2682,10 @@ def _parse_declarations(

mods.validate(var_ok=var_ok, meth_ok=meth_ok, msg=msg)

if parsed_type is None:
if isinstance(parsed_type, Operator):
# this means an operator was encountered, deal with the special case
self._parse_operator_conversion(
mods, location, doxygen, template, is_typedef, is_friend
parsed_type, mods, location, doxygen, template, is_typedef, is_friend
)
return

Expand Down
Loading