diff --git a/pip_api/_installed_distributions.py b/pip_api/_installed_distributions.py index 92bd58a..f4dc5c2 100644 --- a/pip_api/_installed_distributions.py +++ b/pip_api/_installed_distributions.py @@ -7,7 +7,7 @@ from pip_api._call import call from pip_api.exceptions import PipError -from pip_api._vendor.packaging.version import parse # type: ignore +from pip_api._vendor.packaging_legacy.version import parse # type: ignore class Distribution: diff --git a/pip_api/_vendor/packaging/__about__.py b/pip_api/_vendor/packaging/__about__.py deleted file mode 100644 index e70d692..0000000 --- a/pip_api/_vendor/packaging/__about__.py +++ /dev/null @@ -1,26 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. - -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] - -__title__ = "packaging" -__summary__ = "Core utilities for Python packages" -__uri__ = "https://github.com/pypa/packaging" - -__version__ = "21.0" - -__author__ = "Donald Stufft and individual contributors" -__email__ = "donald@stufft.io" - -__license__ = "BSD-2-Clause or Apache-2.0" -__copyright__ = "2014-2019 %s" % __author__ diff --git a/pip_api/_vendor/packaging/__init__.py b/pip_api/_vendor/packaging/__init__.py index 3c50c5d..22809cf 100644 --- a/pip_api/_vendor/packaging/__init__.py +++ b/pip_api/_vendor/packaging/__init__.py @@ -2,24 +2,14 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from .__about__ import ( - __author__, - __copyright__, - __email__, - __license__, - __summary__, - __title__, - __uri__, - __version__, -) +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] +__version__ = "23.2" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/pip_api/_vendor/packaging/_elffile.py b/pip_api/_vendor/packaging/_elffile.py new file mode 100644 index 0000000..6fb19b3 --- /dev/null +++ b/pip_api/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/pip_api/_vendor/packaging/_manylinux.py b/pip_api/_vendor/packaging/_manylinux.py index b0ed657..3705d50 100644 --- a/pip_api/_vendor/packaging/_manylinux.py +++ b/pip_api/_vendor/packaging/_manylinux.py @@ -1,122 +1,62 @@ import collections +import contextlib import functools import os import re -import struct import sys import warnings -from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple - - -# Python does not provide platform information at sufficient granularity to -# identify the architecture of the running executable in some cases, so we -# determine it dynamically by reading the information from the running -# process. This only applies on Linux, which uses the ELF format. -class _ELFFileHeader: - # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header - class _InvalidELFFileHeader(ValueError): - """ - An invalid ELF file header was found. - """ - - ELF_MAGIC_NUMBER = 0x7F454C46 - ELFCLASS32 = 1 - ELFCLASS64 = 2 - ELFDATA2LSB = 1 - ELFDATA2MSB = 2 - EM_386 = 3 - EM_S390 = 22 - EM_ARM = 40 - EM_X86_64 = 62 - EF_ARM_ABIMASK = 0xFF000000 - EF_ARM_ABI_VER5 = 0x05000000 - EF_ARM_ABI_FLOAT_HARD = 0x00000400 - - def __init__(self, file: IO[bytes]) -> None: - def unpack(fmt: str) -> int: - try: - data = file.read(struct.calcsize(fmt)) - result: Tuple[int, ...] = struct.unpack(fmt, data) - except struct.error: - raise _ELFFileHeader._InvalidELFFileHeader() - return result[0] - - self.e_ident_magic = unpack(">I") - if self.e_ident_magic != self.ELF_MAGIC_NUMBER: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_class = unpack("B") - if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_data = unpack("B") - if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_version = unpack("B") - self.e_ident_osabi = unpack("B") - self.e_ident_abiversion = unpack("B") - self.e_ident_pad = file.read(7) - format_h = "H" - format_i = "I" - format_q = "Q" - format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q - self.e_type = unpack(format_h) - self.e_machine = unpack(format_h) - self.e_version = unpack(format_i) - self.e_entry = unpack(format_p) - self.e_phoff = unpack(format_p) - self.e_shoff = unpack(format_p) - self.e_flags = unpack(format_i) - self.e_ehsize = unpack(format_h) - self.e_phentsize = unpack(format_h) - self.e_phnum = unpack(format_h) - self.e_shentsize = unpack(format_h) - self.e_shnum = unpack(format_h) - self.e_shstrndx = unpack(format_h) - - -def _get_elf_header() -> Optional[_ELFFileHeader]: +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: try: - with open(sys.executable, "rb") as f: - elf_header = _ELFFileHeader(f) - except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): - return None - return elf_header + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None -def _is_linux_armhf() -> bool: +def _is_linux_armhf(executable: str) -> bool: # hard-float ABI can be detected from the ELF header of the running # process # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_ARM - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABIMASK - ) == elf_header.EF_ARM_ABI_VER5 - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD - ) == elf_header.EF_ARM_ABI_FLOAT_HARD - return result - - -def _is_linux_i686() -> bool: - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_386 - return result + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) -def _have_compatible_abi(arch: str) -> bool: - if arch == "armv7l": - return _is_linux_armhf() - if arch == "i686": - return _is_linux_i686() - return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x", "loongarch64"} + return any(arch in allowed_archs for arch in archs) # If glibc ever changes its major version, we need to know what the last @@ -141,10 +81,10 @@ def _glibc_version_string_confstr() -> Optional[str]: # platform module. # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 try: - # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". - version_string = os.confstr("CS_GNU_LIBC_VERSION") + # Should be a string like "glibc 2.17". + version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION") assert version_string is not None - _, version = version_string.split() + _, version = version_string.rsplit() except (AssertionError, AttributeError, OSError, ValueError): # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... return None @@ -211,8 +151,8 @@ def _parse_glibc_version(version_str: str) -> Tuple[int, int]: m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) if not m: warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", RuntimeWarning, ) return -1, -1 @@ -228,13 +168,13 @@ def _get_glibc_version() -> Tuple[int, int]: # From PEP 513, PEP 600 -def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: sys_glibc = _get_glibc_version() if sys_glibc < version: return False # Check for presence of _manylinux module. try: - import _manylinux # type: ignore # noqa + import _manylinux # noqa except ImportError: return True if hasattr(_manylinux, "manylinux_compatible"): @@ -264,12 +204,22 @@ def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: } -def platform_tags(linux: str, arch: str) -> Iterator[str]: - if not _have_compatible_abi(arch): +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): return # Oldest glibc to be supported regardless of architecture is (2, 17). too_old_glibc2 = _GLibCVersion(2, 16) - if arch in {"x86_64", "i686"}: + if set(archs) & {"x86_64", "i686"}: # On x86/i686 also oldest glibc to be supported is (2, 5). too_old_glibc2 = _GLibCVersion(2, 4) current_glibc = _GLibCVersion(*_get_glibc_version()) @@ -283,19 +233,20 @@ def platform_tags(linux: str, arch: str) -> Iterator[str]: for glibc_major in range(current_glibc.major - 1, 1, -1): glibc_minor = _LAST_GLIBC_MINOR[glibc_major] glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) - for glibc_max in glibc_max_list: - if glibc_max.major == too_old_glibc2.major: - min_minor = too_old_glibc2.minor - else: - # For other glibc major versions oldest supported is (x, 0). - min_minor = -1 - for glibc_minor in range(glibc_max.minor, min_minor, -1): - glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) - tag = "manylinux_{}_{}".format(*glibc_version) - if _is_compatible(tag, arch, glibc_version): - yield linux.replace("linux", tag) - # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. - if glibc_version in _LEGACY_MANYLINUX_MAP: - legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] - if _is_compatible(legacy_tag, arch, glibc_version): - yield linux.replace("linux", legacy_tag) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/pip_api/_vendor/packaging/_musllinux.py b/pip_api/_vendor/packaging/_musllinux.py index 85450fa..86419df 100644 --- a/pip_api/_vendor/packaging/_musllinux.py +++ b/pip_api/_vendor/packaging/_musllinux.py @@ -4,68 +4,13 @@ linked against musl, and what musl version is used. """ -import contextlib import functools -import operator -import os import re -import struct import subprocess import sys -from typing import IO, Iterator, NamedTuple, Optional, Tuple +from typing import Iterator, NamedTuple, Optional, Sequence - -def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: - return struct.unpack(fmt, f.read(struct.calcsize(fmt))) - - -def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: - """Detect musl libc location by parsing the Python executable. - - Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca - ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html - """ - f.seek(0) - try: - ident = _read_unpacked(f, "16B") - except struct.error: - return None - if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. - return None - f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. - - try: - # e_fmt: Format for program header. - # p_fmt: Format for section header. - # p_idx: Indexes to find p_type, p_offset, and p_filesz. - e_fmt, p_fmt, p_idx = { - 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. - 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. - }[ident[4]] - except KeyError: - return None - else: - p_get = operator.itemgetter(*p_idx) - - # Find the interpreter section and return its content. - try: - _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) - except struct.error: - return None - for i in range(e_phnum + 1): - f.seek(e_phoff + e_phentsize * i) - try: - p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) - except struct.error: - return None - if p_type != 3: # Not PT_INTERP. - continue - f.seek(p_offset) - interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") - if "musl" not in interpreter: - return None - return interpreter - return None +from ._elffile import ELFFile class _MuslVersion(NamedTuple): @@ -95,32 +40,34 @@ def _get_musl_version(executable: str) -> Optional[_MuslVersion]: Version 1.2.2 Dynamic Program Loader """ - with contextlib.ExitStack() as stack: - try: - f = stack.enter_context(open(executable, "rb")) - except IOError: - return None - ld = _parse_ld_musl_from_elf(f) - if not ld: + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: return None - proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) return _parse_musl_version(proc.stderr) -def platform_tags(arch: str) -> Iterator[str]: +def platform_tags(archs: Sequence[str]) -> Iterator[str]: """Generate musllinux tags compatible to the current platform. - :param arch: Should be the part of platform tag after the ``linux_`` - prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a - prerequisite for the current platform to be musllinux-compatible. + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. :returns: An iterator of compatible musllinux tags. """ sys_musl = _get_musl_version(sys.executable) if sys_musl is None: # Python not dynamically linked against musl. return - for minor in range(sys_musl.minor, -1, -1): - yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" if __name__ == "__main__": # pragma: no cover diff --git a/pip_api/_vendor/packaging/_parser.py b/pip_api/_vendor/packaging/_parser.py new file mode 100644 index 0000000..4576981 --- /dev/null +++ b/pip_api/_vendor/packaging/_parser.py @@ -0,0 +1,359 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/pip_api/_vendor/packaging/_structures.py b/pip_api/_vendor/packaging/_structures.py index 9515497..90a6465 100644 --- a/pip_api/_vendor/packaging/_structures.py +++ b/pip_api/_vendor/packaging/_structures.py @@ -19,9 +19,6 @@ def __le__(self, other: object) -> bool: def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other: object) -> bool: - return not isinstance(other, self.__class__) - def __gt__(self, other: object) -> bool: return True @@ -51,9 +48,6 @@ def __le__(self, other: object) -> bool: def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other: object) -> bool: - return not isinstance(other, self.__class__) - def __gt__(self, other: object) -> bool: return False diff --git a/pip_api/_vendor/packaging/_tokenizer.py b/pip_api/_vendor/packaging/_tokenizer.py new file mode 100644 index 0000000..dd0d648 --- /dev/null +++ b/pip_api/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/pip_api/_vendor/packaging/markers.py b/pip_api/_vendor/packaging/markers.py index 87bbab7..8b98fca 100644 --- a/pip_api/_vendor/packaging/markers.py +++ b/pip_api/_vendor/packaging/markers.py @@ -8,19 +8,17 @@ import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pip_api._vendor.pyparsing import ( # noqa: N817 - Forward, - Group, - Literal as L, - ParseException, - ParseResults, - QuotedString, - ZeroOrMore, - stringEnd, - stringStart, +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, ) - +from ._tokenizer import ParserSyntaxError from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name __all__ = [ "InvalidMarker", @@ -52,101 +50,24 @@ class UndefinedEnvironmentName(ValueError): """ -class Node: - def __init__(self, value: Any) -> None: - self.value = value - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return f"<{self.__class__.__name__}('{self}')>" - - def serialize(self) -> str: - raise NotImplementedError - - -class Variable(Node): - def serialize(self) -> str: - return str(self) - - -class Value(Node): - def serialize(self) -> str: - return f'"{self}"' - - -class Op(Node): - def serialize(self) -> str: - return str(self) - - -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - else: - return results +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results def _format_marker( - marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: assert isinstance(marker, (list, tuple, str)) @@ -192,7 +113,7 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: except InvalidSpecifier: pass else: - return spec.contains(lhs) + return spec.contains(lhs, prereleases=True) oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: @@ -201,25 +122,19 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: return oper(lhs, rhs) -class Undefined: - pass - +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) -_undefined = Undefined() + # other environment markers don't have such standards + return values -def _get_env(environment: Dict[str, str], name: str) -> str: - value: Union[str, Undefined] = environment.get(name, _undefined) - - if isinstance(value, Undefined): - raise UndefinedEnvironmentName( - f"{name!r} does not exist in evaluation environment." - ) - - return value - - -def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: groups: List[List[bool]] = [[]] for marker in markers: @@ -231,12 +146,15 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: lhs, op, rhs = marker if isinstance(lhs, Variable): - lhs_value = _get_env(environment, lhs.value) + environment_key = lhs.value + lhs_value = environment[environment_key] rhs_value = rhs.value else: lhs_value = lhs.value - rhs_value = _get_env(environment, rhs.value) + environment_key = rhs.value + rhs_value = environment[environment_key] + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) groups[-1].append(_eval_op(lhs_value, op, rhs_value)) else: assert marker in ["and", "or"] @@ -274,13 +192,29 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) - except ParseException as e: - raise InvalidMarker( - f"Invalid marker: {marker!r}, parse error at " - f"{marker[e.loc : e.loc + 8]!r}" - ) + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e def __str__(self) -> str: return _format_marker(self._markers) @@ -288,6 +222,15 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"" + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. @@ -298,7 +241,12 @@ def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: The environment is determined from the current Python process. """ current_environment = default_environment() + current_environment["extra"] = "" if environment is not None: current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" return _evaluate_markers(self._markers, current_environment) diff --git a/pip_api/_vendor/packaging/metadata.py b/pip_api/_vendor/packaging/metadata.py new file mode 100644 index 0000000..7b0e6a9 --- /dev/null +++ b/pip_api/_vendor/packaging/metadata.py @@ -0,0 +1,822 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import Literal, TypedDict + else: + try: + from typing_extensions import Literal, TypedDict + except ImportError: + + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +try: + ExceptionGroup = __builtins__.ExceptionGroup # type: ignore[attr-defined] +except AttributeError: + + class ExceptionGroup(Exception): # type: ignore[no-redef] # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + try: + value = instance._raw[self.name] # type: ignore[literal-required] + except KeyError: + if self.name in _STRING_FIELDS: + value = "" + elif self.name in _LIST_FIELDS: + value = [] + elif self.name in _DICT_FIELDS: + value = {} + else: # pragma: no cover + assert False + + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[InvalidMetadata] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + exceptions: list[InvalidMetadata] = [] + raw, unparsed = parse_email(data) + + if validate: + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + exceptions.extend(exc_group.exceptions) + raise ExceptionGroup("invalid or unparsed metadata", exceptions) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[List[str]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[List[str]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[List[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[str] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[str] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[str] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[List[str]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[str] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[str] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[str] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[str] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[str] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[str] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[str] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[List[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[List[requirements.Requirement]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[specifiers.SpecifierSet] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Dict[str, str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[List[utils.NormalizedName]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[List[str]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[List[str]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[List[str]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/pip_api/_vendor/packaging/requirements.py b/pip_api/_vendor/packaging/requirements.py index 303deef..0c00eba 100644 --- a/pip_api/_vendor/packaging/requirements.py +++ b/pip_api/_vendor/packaging/requirements.py @@ -2,26 +2,13 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -import re -import string -import urllib.parse -from typing import List, Optional as TOptional, Set - -from pip_api._vendor.pyparsing import ( # noqa - Combine, - Literal as L, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - originalTextFor, - stringEnd, - stringStart, -) - -from .markers import MARKER_EXPR, Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet +from typing import Any, Iterator, Optional, Set + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name class InvalidRequirement(ValueError): @@ -30,60 +17,6 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) - -VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - class Requirement: """Parse a requirement. @@ -99,48 +32,59 @@ class Requirement: def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' - ) - - self.name: str = req.name - if req.url: - parsed_url = urllib.parse.urlparse(req.url) - if parsed_url.scheme == "file": - if urllib.parse.urlunparse(parsed_url) != req.url: - raise InvalidRequirement("Invalid URL given") - elif not (parsed_url.scheme and parsed_url.netloc) or ( - not parsed_url.scheme and not parsed_url.netloc - ): - raise InvalidRequirement(f"Invalid URL: {req.url}") - self.url: TOptional[str] = req.url - else: - self.url = None - self.extras: Set[str] = set(req.extras.asList() if req.extras else []) - self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: TOptional[Marker] = req.marker if req.marker else None - - def __str__(self) -> str: - parts: List[str] = [self.name] + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: Optional[str] = parsed.url or None + self.extras: Set[str] = set(parsed.extras if parsed.extras else []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name if self.extras: formatted_extras = ",".join(sorted(self.extras)) - parts.append(f"[{formatted_extras}]") + yield f"[{formatted_extras}]" if self.specifier: - parts.append(str(self.specifier)) + yield str(self.specifier) if self.url: - parts.append(f"@ {self.url}") + yield f"@ {self.url}" if self.marker: - parts.append(" ") + yield " " if self.marker: - parts.append(f"; {self.marker}") + yield f"; {self.marker}" - return "".join(parts) + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) def __repr__(self) -> str: return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/pip_api/_vendor/packaging/specifiers.py b/pip_api/_vendor/packaging/specifiers.py index ce66bd4..984986b 100644 --- a/pip_api/_vendor/packaging/specifiers.py +++ b/pip_api/_vendor/packaging/specifiers.py @@ -1,20 +1,22 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from pip_api._vendor.packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from pip_api._vendor.packaging.version import Version +""" import abc -import functools import itertools import re -import warnings from typing import ( Callable, - Dict, Iterable, Iterator, List, Optional, - Pattern, Set, Tuple, TypeVar, @@ -22,17 +24,28 @@ ) from .utils import canonicalize_version -from .version import LegacyVersion, Version, parse +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] -ParsedVersion = Union[Version, LegacyVersion] -UnparsedVersion = Union[Version, LegacyVersion, str] -VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) -CallableOperator = Callable[[ParsedVersion, str], bool] + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version class InvalidSpecifier(ValueError): """ - An invalid specifier was found, users should refer to PEP 440. + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' """ @@ -40,42 +53,39 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def __str__(self) -> str: """ - Returns the str representation of this Specifier like object. This + Returns the str representation of this Specifier-like object. This should be representative of the Specifier itself. """ @abc.abstractmethod def __hash__(self) -> int: """ - Returns a hash value for this Specifier like object. + Returns a hash value for this Specifier-like object. """ @abc.abstractmethod def __eq__(self, other: object) -> bool: """ - Returns a boolean representing whether or not the two Specifier like + Returns a boolean representing whether or not the two Specifier-like objects are equal. - """ - @abc.abstractmethod - def __ne__(self, other: object) -> bool: - """ - Returns a boolean representing whether or not the two Specifier like - objects are not equal. + :param other: The other object to check against. """ - @abc.abstractproperty + @property + @abc.abstractmethod def prereleases(self) -> Optional[bool]: - """ - Returns whether or not pre-releases as a whole are allowed by this - specifier. + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. """ @prereleases.setter def prereleases(self, value: bool) -> None: - """ - Sets whether or not pre-releases as a whole are allowed by this - specifier. + """Setter for :attr:`prereleases`. + + :param value: The value to set. """ @abc.abstractmethod @@ -86,238 +96,28 @@ def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: @abc.abstractmethod def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. """ -class _IndividualSpecifier(BaseSpecifier): - - _operators: Dict[str, str] = {} - _regex: Pattern[str] - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - match = self._regex.search(spec) - if not match: - raise InvalidSpecifier(f"Invalid specifier: '{spec}'") - - self._spec: Tuple[str, str] = ( - match.group("operator").strip(), - match.group("version").strip(), - ) - - # Store whether or not this Specifier should accept prereleases - self._prereleases = prereleases - - def __repr__(self) -> str: - pre = ( - f", prereleases={self.prereleases!r}" - if self._prereleases is not None - else "" - ) - - return "<{}({!r}{})>".format(self.__class__.__name__, str(self), pre) - - def __str__(self) -> str: - return "{}{}".format(*self._spec) - - @property - def _canonical_spec(self) -> Tuple[str, str]: - return self._spec[0], canonicalize_version(self._spec[1]) - - def __hash__(self) -> int: - return hash(self._canonical_spec) - - def __eq__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._canonical_spec == other._canonical_spec - - def __ne__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._spec != other._spec - - def _get_operator(self, op: str) -> CallableOperator: - operator_callable: CallableOperator = getattr( - self, f"_compare_{self._operators[op]}" - ) - return operator_callable - - def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: - if not isinstance(version, (LegacyVersion, Version)): - version = parse(version) - return version - - @property - def operator(self) -> str: - return self._spec[0] - - @property - def version(self) -> str: - return self._spec[1] - - @property - def prereleases(self) -> Optional[bool]: - return self._prereleases - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - - def __contains__(self, item: str) -> bool: - return self.contains(item) - - def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None - ) -> bool: - - # Determine if prereleases are to be allowed or not. - if prereleases is None: - prereleases = self.prereleases - - # Normalize item to a Version or LegacyVersion, this allows us to have - # a shortcut for ``"2.0" in Specifier(">=2") - normalized_item = self._coerce_version(item) - - # Determine if we should be supporting prereleases in this specifier - # or not, if we do not support prereleases than we can short circuit - # logic if this version is a prereleases. - if normalized_item.is_prerelease and not prereleases: - return False - - # Actually do the comparison to determine if this item is contained - # within this Specifier or not. - operator_callable: CallableOperator = self._get_operator(self.operator) - return operator_callable(normalized_item, self.version) - - def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - - yielded = False - found_prereleases = [] - - kw = {"prereleases": prereleases if prereleases is not None else True} - - # Attempt to iterate over all the values in the iterable and if any of - # them match, yield them. - for version in iterable: - parsed_version = self._coerce_version(version) - - if self.contains(parsed_version, **kw): - # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later in case nothing - # else matches this specifier. - if parsed_version.is_prerelease and not ( - prereleases or self.prereleases - ): - found_prereleases.append(version) - # Either this is not a prerelease, or we should have been - # accepting prereleases from the beginning. - else: - yielded = True - yield version - - # Now that we've iterated over everything, determine if we've yielded - # any values, and if we have not and we have any prereleases stored up - # then we will go ahead and yield the prereleases. - if not yielded and found_prereleases: - for version in found_prereleases: - yield version - - -class LegacySpecifier(_IndividualSpecifier): - - _regex_str = r""" - (?P(==|!=|<=|>=|<|>)) - \s* - (?P - [^,;\s)]* # Since this is a "legacy" specifier, and the version - # string can be just about anything, we match everything - # except for whitespace, a semi-colon for marker support, - # a closing paren since versions can be enclosed in - # them, and a comma since it's a version separator. - ) - """ - - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) - - _operators = { - "==": "equal", - "!=": "not_equal", - "<=": "less_than_equal", - ">=": "greater_than_equal", - "<": "less_than", - ">": "greater_than", - } +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - super().__init__(spec, prereleases) + .. tip:: - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: - if not isinstance(version, LegacyVersion): - version = LegacyVersion(str(version)) - return version - - def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective == self._coerce_version(spec) - - def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective != self._coerce_version(spec) - - def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective <= self._coerce_version(spec) - - def _compare_greater_than_equal( - self, prospective: LegacyVersion, spec: str - ) -> bool: - return prospective >= self._coerce_version(spec) - - def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective < self._coerce_version(spec) - - def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective > self._coerce_version(spec) - - -def _require_version_compare( - fn: Callable[["Specifier", ParsedVersion, str], bool] -) -> Callable[["Specifier", ParsedVersion, str], bool]: - @functools.wraps(fn) - def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: - if not isinstance(prospective, Version): - return False - return fn(self, prospective, spec) - - return wrapped - - -class Specifier(_IndividualSpecifier): + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ - _regex_str = r""" + _operator_regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P (?: # The identity operators allow for an escape hatch that will @@ -327,8 +127,10 @@ class Specifier(_IndividualSpecifier): # but included entirely as an escape hatch. (?<====) # Only match for the identity operator \s* - [^\s]* # We just match everything, except for whitespace - # since we are only testing for strict identity. + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. ) | (?: @@ -341,23 +143,23 @@ class Specifier(_IndividualSpecifier): v? (?:[0-9]+!)? # epoch [0-9]+(?:\.[0-9]+)* # release - (?: # pre release - [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) - [-_\.]? - [0-9]* - )? - (?: # post release - (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) - )? - # You cannot use a wild card and a dev or local version - # together so group them with a | and make them optional. + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local - | - \.\* # Wild card syntax of .* )? ) | @@ -372,7 +174,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -397,7 +199,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)* # release (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -409,7 +211,10 @@ class Specifier(_IndividualSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", @@ -422,8 +227,153 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } - @_require_version_compare - def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to @@ -444,34 +394,35 @@ def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: prospective, prefix ) - @_require_version_compare - def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_equal(self, prospective: Version, spec: str) -> bool: # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. - prospective = Version(prospective.public) + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - split_spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(normalized_spec) # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - split_prospective = _version_split(str(prospective)) + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - shortened_prospective = split_prospective[: len(split_spec)] - - # Pad out our two sides with zeros so that they both equal the same - # length. - padded_spec, padded_prospective = _pad_version( - split_spec, shortened_prospective - ) + shortened_prospective = padded_prospective[: len(split_spec)] - return padded_prospective == padded_spec + return shortened_prospective == split_spec else: # Convert our spec string into a Version spec_version = Version(spec) @@ -484,30 +435,24 @@ def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: return prospective == spec_version - @_require_version_compare - def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: return not self._compare_equal(prospective, spec) - @_require_version_compare - def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) <= Version(spec) - @_require_version_compare - def _compare_greater_than_equal( - self, prospective: ParsedVersion, spec: str - ) -> bool: + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) >= Version(spec) - @_require_version_compare - def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -532,8 +477,7 @@ def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: # version in the spec. return True - @_require_version_compare - def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -567,34 +511,133 @@ def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bo def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() - @property - def prereleases(self) -> bool: + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. - # If there is an explicit prereleases set for this, then we'll just - # blindly use that. - if self._prereleases is not None: - return self._prereleases + :param item: The item to check for. - # Look at all of our specifiers and determine if they are inclusive - # operators, and if they are if they are including an explicit - # prerelease. - operator, version = self._spec - if operator in ["==", ">=", "<=", "~=", "==="]: - # The == specifier can include a trailing .*, if it does we - # want to remove before parsing. - if operator == "==" and version.endswith(".*"): - version = version[:-2] + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - # Parse the version, and if it is a pre-release than this - # specifier allows pre-releases. - if parse(version).is_prerelease: - return True + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) - return False + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") @@ -636,22 +679,39 @@ def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + def __init__( self, specifiers: str = "", prereleases: Optional[bool] = None ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ - # Split on , to break each individual specifier into it's own item, and + # Split on `,` to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a - # Specifier and falling back to a LegacySpecifier. - parsed: Set[_IndividualSpecifier] = set() + # Specifier. + parsed: Set[Specifier] = set() for specifier in split_specifiers: - try: - parsed.add(Specifier(specifier)) - except InvalidSpecifier: - parsed.add(LegacySpecifier(specifier)) + parsed.add(Specifier(specifier)) # Turn our parsed specifiers into a frozen set and save them for later. self._specs = frozenset(parsed) @@ -660,22 +720,74 @@ def __init__( # we accept prereleases or not. self._prereleases = prereleases + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ pre = ( f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "".format(str(self), pre) + return f"" def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ return ",".join(sorted(str(s) for s in self._specs)) def __hash__(self) -> int: return hash(self._specs) def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): @@ -699,59 +811,98 @@ def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": return specifier def __eq__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented return self._specs == other._specs - def __ne__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): - other = SpecifierSet(str(other)) - elif not isinstance(other, SpecifierSet): - return NotImplemented - - return self._specs != other._specs - def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" return len(self._specs) - def __iter__(self) -> Iterator[_IndividualSpecifier]: - return iter(self._specs) - - @property - def prereleases(self) -> Optional[bool]: - - # If we have been given an explicit prerelease modifier, then we'll - # pass that through here. - if self._prereleases is not None: - return self._prereleases - - # If we don't have any specifiers, and we don't have a forced value, - # then we'll just return None since we don't know if this should have - # pre-releases or not. - if not self._specs: - return None - - # Otherwise we'll see if any of the given specifiers accept - # prereleases, if any of them do we'll return True, otherwise False. - return any(s.prereleases for s in self._specs) + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ return self.contains(item) def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, ) -> bool: - - # Ensure that our item is a Version or LegacyVersion instance. - if not isinstance(item, (LegacyVersion, Version)): - item = parse(item) + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the @@ -768,6 +919,9 @@ def contains( if not prereleases and item.is_prerelease: return False + if installed and item.is_prerelease: + item = Version(item.base_version) + # We simply dispatch to the underlying specs here to make sure that the # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers @@ -775,9 +929,46 @@ def contains( return all(s.contains(item, prereleases=prereleases) for s in self._specs) def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -790,27 +981,16 @@ def filter( if self._specs: for spec in self._specs: iterable = spec.filter(iterable, prereleases=bool(prereleases)) - return iterable + return iter(iterable) # If we do not have any specifiers, then we need to have a rough filter # which will filter out any pre-releases, unless there are no final - # releases, and which will filter out LegacyVersion in general. + # releases. else: - filtered: List[VersionTypeVar] = [] - found_prereleases: List[VersionTypeVar] = [] - - item: UnparsedVersion - parsed_version: Union[Version, LegacyVersion] + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] for item in iterable: - # Ensure that we some kind of Version class for this item. - if not isinstance(item, (LegacyVersion, Version)): - parsed_version = parse(item) - else: - parsed_version = item - - # Filter out any item which is parsed as a LegacyVersion - if isinstance(parsed_version, LegacyVersion): - continue + parsed_version = _coerce_version(item) # Store any item which is a pre-release for later unless we've # already found a final version or we are accepting prereleases @@ -823,6 +1003,6 @@ def filter( # If we've found no items except for pre-releases, then we'll go # ahead and use the pre-releases if not filtered and found_prereleases and prereleases is None: - return found_prereleases + return iter(found_prereleases) - return filtered + return iter(filtered) diff --git a/pip_api/_vendor/packaging/tags.py b/pip_api/_vendor/packaging/tags.py index 82a47cd..37f33b1 100644 --- a/pip_api/_vendor/packaging/tags.py +++ b/pip_api/_vendor/packaging/tags.py @@ -4,6 +4,8 @@ import logging import platform +import struct +import subprocess import sys import sysconfig from importlib.machinery import EXTENSION_SUFFIXES @@ -36,7 +38,7 @@ } -_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 class Tag: @@ -90,7 +92,7 @@ def __str__(self) -> str: return f"{self._interpreter}-{self._abi}-{self._platform}" def __repr__(self) -> str: - return "<{self} @ {self_id}>".format(self=self, self_id=id(self)) + return f"<{self} @ {id(self)}>" def parse_tag(tag: str) -> FrozenSet[Tag]: @@ -110,7 +112,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]: def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: - value = sysconfig.get_config_var(name) + value: Union[int, str, None] = sysconfig.get_config_var(name) if value is None and warn: logger.debug( "Config variable '%s' is unset, Python ABI tag may be incorrect", name @@ -119,7 +121,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: def _normalize_string(string: str) -> str: - return string.replace(".", "_").replace("-", "_") + return string.replace(".", "_").replace("-", "_").replace(" ", "_") def _abi3_applies(python_version: PythonVersion) -> bool: @@ -192,7 +194,7 @@ def cpython_tags( if not python_version: python_version = sys.version_info[:2] - interpreter = "cp{}".format(_version_nodot(python_version[:2])) + interpreter = f"cp{_version_nodot(python_version[:2])}" if abis is None: if len(python_version) > 1: @@ -207,7 +209,7 @@ def cpython_tags( except ValueError: pass - platforms = list(platforms or _platform_tags()) + platforms = list(platforms or platform_tags()) for abi in abis: for platform_ in platforms: yield Tag(interpreter, abi, platform_) @@ -224,10 +226,45 @@ def cpython_tags( yield Tag(interpreter, "abi3", platform_) -def _generic_abi() -> Iterator[str]: - abi = sysconfig.get_config_var("SOABI") - if abi: - yield _normalize_string(abi) +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] def generic_tags( @@ -251,8 +288,9 @@ def generic_tags( interpreter = "".join([interp_name, interp_version]) if abis is None: abis = _generic_abi() - platforms = list(platforms or _platform_tags()) - abis = list(abis) + else: + abis = list(abis) + platforms = list(platforms or platform_tags()) if "none" not in abis: abis.append("none") for abi in abis: @@ -268,11 +306,11 @@ def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: all previous versions of that major version. """ if len(py_version) > 1: - yield "py{version}".format(version=_version_nodot(py_version[:2])) - yield "py{major}".format(major=py_version[0]) + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" if len(py_version) > 1: for minor in range(py_version[1] - 1, -1, -1): - yield "py{version}".format(version=_version_nodot((py_version[0], minor))) + yield f"py{_version_nodot((py_version[0], minor))}" def compatible_tags( @@ -290,7 +328,7 @@ def compatible_tags( """ if not python_version: python_version = sys.version_info[:2] - platforms = list(platforms or _platform_tags()) + platforms = list(platforms or platform_tags()) for version in _py_interpreter_range(python_version): for platform_ in platforms: yield Tag(version, "none", platform_) @@ -356,6 +394,22 @@ def mac_platforms( version_str, _, cpu_arch = platform.mac_ver() if version is None: version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) else: version = version if arch is None: @@ -416,22 +470,28 @@ def mac_platforms( def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return if is_32bit: if linux == "linux_x86_64": linux = "linux_i686" elif linux == "linux_aarch64": - linux = "linux_armv7l" + linux = "linux_armv8l" _, arch = linux.split("_", 1) - yield from _manylinux.platform_tags(linux, arch) - yield from _musllinux.platform_tags(arch) - yield linux + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" def _generic_platforms() -> Iterator[str]: yield _normalize_string(sysconfig.get_platform()) -def _platform_tags() -> Iterator[str]: +def platform_tags() -> Iterator[str]: """ Provides the platform tags for this installation. """ @@ -446,6 +506,9 @@ def _platform_tags() -> Iterator[str]: def interpreter_name() -> str: """ Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. """ name = sys.implementation.name return INTERPRETER_SHORT_NAMES.get(name) or name @@ -481,4 +544,10 @@ def sys_tags(*, warn: bool = False) -> Iterator[Tag]: else: yield from generic_tags() - yield from compatible_tags() + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/pip_api/_vendor/packaging/utils.py b/pip_api/_vendor/packaging/utils.py index bab11b8..c2c2f75 100644 --- a/pip_api/_vendor/packaging/utils.py +++ b/pip_api/_vendor/packaging/utils.py @@ -12,6 +12,12 @@ NormalizedName = NewType("NormalizedName", str) +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + class InvalidWheelFilename(ValueError): """ An invalid wheel filename was found, users should refer to PEP 427. @@ -24,18 +30,31 @@ class InvalidSdistFilename(ValueError): """ +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) _canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") # PEP 427: The build number must start with a digit. _build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name: str) -> NormalizedName: +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") # This is taken from PEP 503. value = _canonicalize_regex.sub("-", name).lower() return cast(NormalizedName, value) -def canonicalize_version(version: Union[Version, str]) -> str: +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: """ This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. @@ -56,8 +75,11 @@ def canonicalize_version(version: Union[Version, str]) -> str: parts.append(f"{parsed.epoch}!") # Release segment - # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) # Pre-release if parsed.pre is not None: @@ -95,11 +117,18 @@ def parse_wheel_filename( parts = filename.split("-", dashes - 2) name_part = parts[0] - # See PEP 427 for the rules on escaping the project name + # See PEP 427 for the rules on escaping the project name. if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: raise InvalidWheelFilename(f"Invalid project name: {filename}") name = canonicalize_name(name_part) - version = Version(parts[1]) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename}" + ) from e + if dashes == 5: build_part = parts[2] build_match = _build_tag_regex.match(build_part) @@ -132,5 +161,12 @@ def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") name = canonicalize_name(name_part) - version = Version(version_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename}" + ) from e + return (name, version) diff --git a/pip_api/_vendor/packaging/version.py b/pip_api/_vendor/packaging/version.py index de9a09a..91b7bee 100644 --- a/pip_api/_vendor/packaging/version.py +++ b/pip_api/_vendor/packaging/version.py @@ -1,64 +1,71 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from pip_api._vendor.packaging.version import parse, Version +""" -import collections import itertools import re -import warnings -from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union +from typing import Any, Callable, NamedTuple, Optional, SupportsInt, Tuple, Union from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] -InfiniteTypes = Union[InfinityType, NegativeInfinityType] -PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] -SubLocalType = Union[InfiniteTypes, int, str] -LocalType = Union[ +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ NegativeInfinityType, - Tuple[ - Union[ - SubLocalType, - Tuple[SubLocalType, str], - Tuple[NegativeInfinityType, SubLocalType], - ], - ..., - ], + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], ] CmpKey = Tuple[ - int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType -] -LegacyCmpKey = Tuple[int, Tuple[str, ...]] -VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, ] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] -_Version = collections.namedtuple( - "_Version", ["epoch", "release", "dev", "pre", "post", "local"] -) +class _Version(NamedTuple): + epoch: int + release: Tuple[int, ...] + dev: Optional[Tuple[str, int]] + pre: Optional[Tuple[str, int]] + post: Optional[Tuple[str, int]] + local: Optional[LocalType] -def parse(version: str) -> Union["LegacyVersion", "Version"]: - """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. + +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) + return Version(version) class InvalidVersion(ValueError): - """ - An invalid version was found, users should refer to PEP 440. + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' """ class _BaseVersion: - _key: Union[CmpKey, LegacyCmpKey] + _key: Tuple[Any, ...] def __hash__(self) -> int: return hash(self._key) @@ -103,133 +110,16 @@ def __ne__(self, other: object) -> bool: return self._key != other._key -class LegacyVersion(_BaseVersion): - def __init__(self, version: str) -> None: - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def __str__(self) -> str: - return self._version - - def __repr__(self) -> str: - return f"" - - @property - def public(self) -> str: - return self._version - - @property - def base_version(self) -> str: - return self._version - - @property - def epoch(self) -> int: - return -1 - - @property - def release(self) -> None: - return None - - @property - def pre(self) -> None: - return None - - @property - def post(self) -> None: - return None - - @property - def dev(self) -> None: - return None - - @property - def local(self) -> None: - return None - - @property - def is_prerelease(self) -> bool: - return False - - @property - def is_postrelease(self) -> bool: - return False - - @property - def is_devrelease(self) -> bool: - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s: str) -> Iterator[str]: - for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) - - if not part or part == ".": - continue - - if part[:1] in "0123456789": - # pad for numeric comparison - yield part.zfill(8) - else: - yield "*" + part - - # ensure that alpha/beta/candidate are before final - yield "*final" - - -def _legacy_cmpkey(version: str) -> LegacyCmpKey: - - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 - - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts: List[str] = [] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() - - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() - - parts.append(part) - - return epoch, tuple(parts) - - # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch (?P[0-9]+(?:\.[0-9]+)*) # release segment (?P
                                          # pre-release
             [-_\.]?
-            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            (?Palpha|a|beta|b|preview|pre|c|rc)
             [-_\.]?
             (?P[0-9]+)?
         )?
@@ -253,12 +143,56 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:
     (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
 """
 
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
 
 class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
 
     _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
 
     def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
 
         # Validate the version and parse it into pieces
         match = self._regex.search(version)
@@ -288,9 +222,19 @@ def __init__(self, version: str) -> None:
         )
 
     def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
         return f""
 
     def __str__(self) -> str:
+        """A string representation of the version that can be rounded-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
         parts = []
 
         # Epoch
@@ -320,29 +264,77 @@ def __str__(self) -> str:
 
     @property
     def epoch(self) -> int:
-        _epoch: int = self._version.epoch
-        return _epoch
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
 
     @property
     def release(self) -> Tuple[int, ...]:
-        _release: Tuple[int, ...] = self._version.release
-        return _release
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
 
     @property
     def pre(self) -> Optional[Tuple[str, int]]:
-        _pre: Optional[Tuple[str, int]] = self._version.pre
-        return _pre
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
 
     @property
     def post(self) -> Optional[int]:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
         return self._version.post[1] if self._version.post else None
 
     @property
     def dev(self) -> Optional[int]:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
         return self._version.dev[1] if self._version.dev else None
 
     @property
     def local(self) -> Optional[str]:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
         if self._version.local:
             return ".".join(str(x) for x in self._version.local)
         else:
@@ -350,10 +342,31 @@ def local(self) -> Optional[str]:
 
     @property
     def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1.2.3+abc.dev1").public
+        '1.2.3'
+        """
         return str(self).split("+", 1)[0]
 
     @property
     def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3+abc.dev1").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
         parts = []
 
         # Epoch
@@ -367,31 +380,77 @@ def base_version(self) -> str:
 
     @property
     def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
         return self.dev is not None or self.pre is not None
 
     @property
     def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
         return self.post is not None
 
     @property
     def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
         return self.dev is not None
 
     @property
     def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
         return self.release[0] if len(self.release) >= 1 else 0
 
     @property
     def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
         return self.release[1] if len(self.release) >= 2 else 0
 
     @property
     def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
         return self.release[2] if len(self.release) >= 3 else 0
 
 
 def _parse_letter_version(
-    letter: str, number: Union[str, bytes, SupportsInt]
+    letter: Optional[str], number: Union[str, bytes, SupportsInt, None]
 ) -> Optional[Tuple[str, int]]:
 
     if letter:
@@ -429,7 +488,7 @@ def _parse_letter_version(
 _local_version_separators = re.compile(r"[\._-]")
 
 
-def _parse_local_version(local: str) -> Optional[LocalType]:
+def _parse_local_version(local: Optional[str]) -> Optional[LocalType]:
     """
     Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
     """
@@ -447,7 +506,7 @@ def _cmpkey(
     pre: Optional[Tuple[str, int]],
     post: Optional[Tuple[str, int]],
     dev: Optional[Tuple[str, int]],
-    local: Optional[Tuple[SubLocalType]],
+    local: Optional[LocalType],
 ) -> CmpKey:
 
     # When we compare a release version, we want to compare it with all of the
@@ -464,7 +523,7 @@ def _cmpkey(
     # if there is not a pre or a post segment. If we have one of those then
     # the normal sorting rules will handle this case correctly.
     if pre is None and post is None and dev is not None:
-        _pre: PrePostDevType = NegativeInfinity
+        _pre: CmpPrePostDevType = NegativeInfinity
     # Versions without a pre-release (except as noted above) should sort after
     # those with one.
     elif pre is None:
@@ -474,21 +533,21 @@ def _cmpkey(
 
     # Versions without a post segment should sort before those with one.
     if post is None:
-        _post: PrePostDevType = NegativeInfinity
+        _post: CmpPrePostDevType = NegativeInfinity
 
     else:
         _post = post
 
     # Versions without a development segment should sort after those with one.
     if dev is None:
-        _dev: PrePostDevType = Infinity
+        _dev: CmpPrePostDevType = Infinity
 
     else:
         _dev = dev
 
     if local is None:
         # Versions without a local segment should sort before those with one.
-        _local: LocalType = NegativeInfinity
+        _local: CmpLocalType = NegativeInfinity
     else:
         # Versions with a local segment need that segment parsed to implement
         # the sorting rules in PEP440.
diff --git a/pip_api/_vendor/packaging_legacy/LICENSE b/pip_api/_vendor/packaging_legacy/LICENSE
new file mode 100644
index 0000000..6f62d44
--- /dev/null
+++ b/pip_api/_vendor/packaging_legacy/LICENSE
@@ -0,0 +1,3 @@
+This software is made available under the terms of *either* of the licenses
+found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
+under the terms of *both* these licenses.
diff --git a/pip_api/_vendor/packaging_legacy/LICENSE.APACHE b/pip_api/_vendor/packaging_legacy/LICENSE.APACHE
new file mode 100644
index 0000000..f433b1a
--- /dev/null
+++ b/pip_api/_vendor/packaging_legacy/LICENSE.APACHE
@@ -0,0 +1,177 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
diff --git a/pip_api/_vendor/packaging_legacy/LICENSE.BSD b/pip_api/_vendor/packaging_legacy/LICENSE.BSD
new file mode 100644
index 0000000..42ce7b7
--- /dev/null
+++ b/pip_api/_vendor/packaging_legacy/LICENSE.BSD
@@ -0,0 +1,23 @@
+Copyright (c) Donald Stufft and individual contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/pip_api/_vendor/packaging_legacy/__init__.py b/pip_api/_vendor/packaging_legacy/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pip_api/_vendor/packaging_legacy/version.py b/pip_api/_vendor/packaging_legacy/version.py
new file mode 100644
index 0000000..e387e79
--- /dev/null
+++ b/pip_api/_vendor/packaging_legacy/version.py
@@ -0,0 +1,135 @@
+import re
+from typing import Iterator, List, Tuple
+
+from pip_api._vendor.packaging.version import InvalidVersion, _BaseVersion, parse as _parse
+
+LegacyCmpKey = Tuple[int, Tuple[str, ...]]
+
+
+def parse(version: str) -> "_BaseVersion":
+    """
+    Parse the given version string and return either a :class:`Version` object
+    or a :class:`LegacyVersion` object depending on if the given version is
+    a valid PEP 440 version or a legacy version.
+    Parse the given version string.
+    Returns a :class:`Version` object, if the given version is a valid PEP 440 version.
+    Raises :class:`InvalidVersion` otherwise.
+    """
+    try:
+        return _parse(version)
+    except InvalidVersion:
+        return LegacyVersion(version)
+
+
+class LegacyVersion(_BaseVersion):
+
+    _key: LegacyCmpKey
+
+    def __init__(self, version: str) -> None:
+        self._version = str(version)
+        self._key: LegacyCmpKey = _legacy_cmpkey(self._version)
+
+    def __str__(self) -> str:
+        return self._version
+
+    def __repr__(self) -> str:
+        return f""
+
+    @property
+    def public(self) -> str:
+        return self._version
+
+    @property
+    def base_version(self) -> str:
+        return self._version
+
+    @property
+    def epoch(self) -> int:
+        return -1
+
+    @property
+    def release(self) -> None:
+        return None
+
+    @property
+    def pre(self) -> None:
+        return None
+
+    @property
+    def post(self) -> None:
+        return None
+
+    @property
+    def dev(self) -> None:
+        return None
+
+    @property
+    def local(self) -> None:
+        return None
+
+    @property
+    def is_prerelease(self) -> bool:
+        return False
+
+    @property
+    def is_postrelease(self) -> bool:
+        return False
+
+    @property
+    def is_devrelease(self) -> bool:
+        return False
+
+
+_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE)
+
+_legacy_version_replacement_map = {
+    "pre": "c",
+    "preview": "c",
+    "-": "final-",
+    "rc": "c",
+    "dev": "@",
+}
+
+
+def _parse_version_parts(s: str) -> Iterator[str]:
+    for part in _legacy_version_component_re.split(s):
+        part = _legacy_version_replacement_map.get(part, part)
+
+        if not part or part == ".":
+            continue
+
+        if part[:1] in "0123456789":
+            # pad for numeric comparison
+            yield part.zfill(8)
+        else:
+            yield "*" + part
+
+    # ensure that alpha/beta/candidate are before final
+    yield "*final"
+
+
+def _legacy_cmpkey(version: str) -> LegacyCmpKey:
+
+    # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
+    # greater than or equal to 0. This will effectively put the LegacyVersion,
+    # which uses the defacto standard originally implemented by setuptools,
+    # as before all PEP 440 versions.
+    epoch = -1
+
+    # This scheme is taken from pkg_resources.parse_version setuptools prior to
+    # it's adoption of the packaging library.
+    parts: List[str] = []
+    for part in _parse_version_parts(version.lower()):
+        if part.startswith("*"):
+            # remove "-" before a prerelease tag
+            if part < "*final":
+                while parts and parts[-1] == "*final-":
+                    parts.pop()
+
+            # remove trailing zeros from each series of numeric parts
+            while parts and parts[-1] == "00000000":
+                parts.pop()
+
+        parts.append(part)
+
+    return epoch, tuple(parts)
diff --git a/pip_api/_vendor/pyparsing.LICENSE b/pip_api/_vendor/pyparsing.LICENSE
deleted file mode 100644
index 1bf9852..0000000
--- a/pip_api/_vendor/pyparsing.LICENSE
+++ /dev/null
@@ -1,18 +0,0 @@
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pip_api/_vendor/pyparsing.py b/pip_api/_vendor/pyparsing.py
deleted file mode 100644
index 71d3959..0000000
--- a/pip_api/_vendor/pyparsing.py
+++ /dev/null
@@ -1,7107 +0,0 @@
-# -*- coding: utf-8 -*-
-# module pyparsing.py
-#
-# Copyright (c) 2003-2019  Paul T. McGuire
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__doc__ = \
-"""
-pyparsing module - Classes and methods to define and execute parsing grammars
-=============================================================================
-
-The pyparsing module is an alternative approach to creating and
-executing simple grammars, vs. the traditional lex/yacc approach, or the
-use of regular expressions.  With pyparsing, you don't need to learn
-a new syntax for defining grammars or matching expressions - the parsing
-module provides a library of classes that you use to construct the
-grammar directly in Python.
-
-Here is a program to parse "Hello, World!" (or any greeting of the form
-``", !"``), built up using :class:`Word`,
-:class:`Literal`, and :class:`And` elements
-(the :class:`'+'` operators create :class:`And` expressions,
-and the strings are auto-converted to :class:`Literal` expressions)::
-
-    from pip_api._vendor.pyparsing import Word, alphas
-
-    # define grammar of a greeting
-    greet = Word(alphas) + "," + Word(alphas) + "!"
-
-    hello = "Hello, World!"
-    print (hello, "->", greet.parseString(hello))
-
-The program outputs the following::
-
-    Hello, World! -> ['Hello', ',', 'World', '!']
-
-The Python representation of the grammar is quite readable, owing to the
-self-explanatory class names, and the use of '+', '|' and '^' operators.
-
-The :class:`ParseResults` object returned from
-:class:`ParserElement.parseString` can be
-accessed as a nested list, a dictionary, or an object with named
-attributes.
-
-The pyparsing module handles some of the problems that are typically
-vexing when writing text parsers:
-
-  - extra or missing whitespace (the above program will also handle
-    "Hello,World!", "Hello  ,  World  !", etc.)
-  - quoted strings
-  - embedded comments
-
-
-Getting Started -
------------------
-Visit the classes :class:`ParserElement` and :class:`ParseResults` to
-see the base classes that most other pyparsing
-classes inherit from. Use the docstrings for examples of how to:
-
- - construct literal match expressions from :class:`Literal` and
-   :class:`CaselessLiteral` classes
- - construct character word-group expressions using the :class:`Word`
-   class
- - see how to create repetitive expressions using :class:`ZeroOrMore`
-   and :class:`OneOrMore` classes
- - use :class:`'+'`, :class:`'|'`, :class:`'^'`,
-   and :class:`'&'` operators to combine simple expressions into
-   more complex ones
- - associate names with your parsed results using
-   :class:`ParserElement.setResultsName`
- - access the parsed data, which is returned as a :class:`ParseResults`
-   object
- - find some helpful expression short-cuts like :class:`delimitedList`
-   and :class:`oneOf`
- - find more useful common expressions in the :class:`pyparsing_common`
-   namespace class
-"""
-
-__version__ = "2.4.7"
-__versionTime__ = "30 Mar 2020 00:43 UTC"
-__author__ = "Paul McGuire "
-
-import string
-from weakref import ref as wkref
-import copy
-import sys
-import warnings
-import re
-import sre_constants
-import collections
-import pprint
-import traceback
-import types
-from datetime import datetime
-from operator import itemgetter
-import itertools
-from functools import wraps
-from contextlib import contextmanager
-
-try:
-    # Python 3
-    from itertools import filterfalse
-except ImportError:
-    from itertools import ifilterfalse as filterfalse
-
-try:
-    from _thread import RLock
-except ImportError:
-    from threading import RLock
-
-try:
-    # Python 3
-    from collections.abc import Iterable
-    from collections.abc import MutableMapping, Mapping
-except ImportError:
-    # Python 2.7
-    from collections import Iterable
-    from collections import MutableMapping, Mapping
-
-try:
-    from collections import OrderedDict as _OrderedDict
-except ImportError:
-    try:
-        from ordereddict import OrderedDict as _OrderedDict
-    except ImportError:
-        _OrderedDict = None
-
-try:
-    from types import SimpleNamespace
-except ImportError:
-    class SimpleNamespace: pass
-
-# version compatibility configuration
-__compat__ = SimpleNamespace()
-__compat__.__doc__ = """
-    A cross-version compatibility configuration for pyparsing features that will be
-    released in a future version. By setting values in this configuration to True,
-    those features can be enabled in prior versions for compatibility development
-    and testing.
-
-     - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
-       of results names when an And expression is nested within an Or or MatchFirst; set to
-       True to enable bugfix released in pyparsing 2.3.0, or False to preserve
-       pre-2.3.0 handling of named results
-"""
-__compat__.collect_all_And_tokens = True
-
-__diag__ = SimpleNamespace()
-__diag__.__doc__ = """
-Diagnostic configuration (all default to False)
-     - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
-       name is defined on a MatchFirst or Or expression with one or more And subexpressions
-       (only warns if __compat__.collect_all_And_tokens is False)
-     - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
-       name is defined on a containing expression with ungrouped subexpressions that also
-       have results names
-     - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
-       with a results name, but has no contents defined
-     - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
-       incorrectly called with multiple str arguments
-     - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
-       calls to ParserElement.setName()
-"""
-__diag__.warn_multiple_tokens_in_named_alternation = False
-__diag__.warn_ungrouped_named_tokens_in_collection = False
-__diag__.warn_name_set_on_empty_Forward = False
-__diag__.warn_on_multiple_string_args_to_oneof = False
-__diag__.enable_debug_on_named_expressions = False
-__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
-
-def _enable_all_warnings():
-    __diag__.warn_multiple_tokens_in_named_alternation = True
-    __diag__.warn_ungrouped_named_tokens_in_collection = True
-    __diag__.warn_name_set_on_empty_Forward = True
-    __diag__.warn_on_multiple_string_args_to_oneof = True
-__diag__.enable_all_warnings = _enable_all_warnings
-
-
-__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
-           'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
-           'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
-           'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
-           'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
-           'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
-           'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
-           'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
-           'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
-           'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
-           'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
-           'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
-           'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
-           'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
-           'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
-           'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
-           'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-           'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
-           'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
-           'conditionAsParseAction', 're',
-           ]
-
-system_version = tuple(sys.version_info)[:3]
-PY_3 = system_version[0] == 3
-if PY_3:
-    _MAX_INT = sys.maxsize
-    basestring = str
-    unichr = chr
-    unicode = str
-    _ustr = str
-
-    # build list of single arg builtins, that can be used as parse actions
-    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
-
-else:
-    _MAX_INT = sys.maxint
-    range = xrange
-
-    def _ustr(obj):
-        """Drop-in replacement for str(obj) that tries to be Unicode
-        friendly. It first tries str(obj). If that fails with
-        a UnicodeEncodeError, then it tries unicode(obj). It then
-        < returns the unicode object | encodes it with the default
-        encoding | ... >.
-        """
-        if isinstance(obj, unicode):
-            return obj
-
-        try:
-            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
-            # it won't break any existing code.
-            return str(obj)
-
-        except UnicodeEncodeError:
-            # Else encode it
-            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
-            xmlcharref = Regex(r'&#\d+;')
-            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
-            return xmlcharref.transformString(ret)
-
-    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
-    singleArgBuiltins = []
-    import __builtin__
-
-    for fname in "sum len sorted reversed list tuple set any all min max".split():
-        try:
-            singleArgBuiltins.append(getattr(__builtin__, fname))
-        except AttributeError:
-            continue
-
-_generatorType = type((y for y in range(1)))
-
-def _xml_escape(data):
-    """Escape &, <, >, ", ', etc. in a string of data."""
-
-    # ampersand must be replaced first
-    from_symbols = '&><"\''
-    to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
-    for from_, to_ in zip(from_symbols, to_symbols):
-        data = data.replace(from_, to_)
-    return data
-
-alphas = string.ascii_uppercase + string.ascii_lowercase
-nums = "0123456789"
-hexnums = nums + "ABCDEFabcdef"
-alphanums = alphas + nums
-_bslash = chr(92)
-printables = "".join(c for c in string.printable if c not in string.whitespace)
-
-
-def conditionAsParseAction(fn, message=None, fatal=False):
-    msg = message if message is not None else "failed user-defined condition"
-    exc_type = ParseFatalException if fatal else ParseException
-    fn = _trim_arity(fn)
-
-    @wraps(fn)
-    def pa(s, l, t):
-        if not bool(fn(s, l, t)):
-            raise exc_type(s, l, msg)
-
-    return pa
-
-class ParseBaseException(Exception):
-    """base exception class for all parsing runtime exceptions"""
-    # Performance tuning: we construct a *lot* of these, so keep this
-    # constructor as small and fast as possible
-    def __init__(self, pstr, loc=0, msg=None, elem=None):
-        self.loc = loc
-        if msg is None:
-            self.msg = pstr
-            self.pstr = ""
-        else:
-            self.msg = msg
-            self.pstr = pstr
-        self.parserElement = elem
-        self.args = (pstr, loc, msg)
-
-    @classmethod
-    def _from_exception(cls, pe):
-        """
-        internal factory method to simplify creating one type of ParseException
-        from another - avoids having __init__ signature conflicts among subclasses
-        """
-        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
-
-    def __getattr__(self, aname):
-        """supported attributes by name are:
-           - lineno - returns the line number of the exception text
-           - col - returns the column number of the exception text
-           - line - returns the line containing the exception text
-        """
-        if aname == "lineno":
-            return lineno(self.loc, self.pstr)
-        elif aname in ("col", "column"):
-            return col(self.loc, self.pstr)
-        elif aname == "line":
-            return line(self.loc, self.pstr)
-        else:
-            raise AttributeError(aname)
-
-    def __str__(self):
-        if self.pstr:
-            if self.loc >= len(self.pstr):
-                foundstr = ', found end of text'
-            else:
-                foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
-        else:
-            foundstr = ''
-        return ("%s%s  (at char %d), (line:%d, col:%d)" %
-                   (self.msg, foundstr, self.loc, self.lineno, self.column))
-    def __repr__(self):
-        return _ustr(self)
-    def markInputline(self, markerString=">!<"):
-        """Extracts the exception line from the input string, and marks
-           the location of the exception with a special symbol.
-        """
-        line_str = self.line
-        line_column = self.column - 1
-        if markerString:
-            line_str = "".join((line_str[:line_column],
-                                markerString, line_str[line_column:]))
-        return line_str.strip()
-    def __dir__(self):
-        return "lineno col line".split() + dir(type(self))
-
-class ParseException(ParseBaseException):
-    """
-    Exception thrown when parse expressions don't match class;
-    supported attributes by name are:
-    - lineno - returns the line number of the exception text
-    - col - returns the column number of the exception text
-    - line - returns the line containing the exception text
-
-    Example::
-
-        try:
-            Word(nums).setName("integer").parseString("ABC")
-        except ParseException as pe:
-            print(pe)
-            print("column: {}".format(pe.col))
-
-    prints::
-
-       Expected integer (at char 0), (line:1, col:1)
-        column: 1
-
-    """
-
-    @staticmethod
-    def explain(exc, depth=16):
-        """
-        Method to take an exception and translate the Python internal traceback into a list
-        of the pyparsing expressions that caused the exception to be raised.
-
-        Parameters:
-
-         - exc - exception raised during parsing (need not be a ParseException, in support
-           of Python exceptions that might be raised in a parse action)
-         - depth (default=16) - number of levels back in the stack trace to list expression
-           and function names; if None, the full stack trace names will be listed; if 0, only
-           the failing input line, marker, and exception string will be shown
-
-        Returns a multi-line string listing the ParserElements and/or function names in the
-        exception's stack trace.
-
-        Note: the diagnostic output will include string representations of the expressions
-        that failed to parse. These representations will be more helpful if you use `setName` to
-        give identifiable names to your expressions. Otherwise they will use the default string
-        forms, which may be cryptic to read.
-
-        explain() is only supported under Python 3.
-        """
-        import inspect
-
-        if depth is None:
-            depth = sys.getrecursionlimit()
-        ret = []
-        if isinstance(exc, ParseBaseException):
-            ret.append(exc.line)
-            ret.append(' ' * (exc.col - 1) + '^')
-        ret.append("{0}: {1}".format(type(exc).__name__, exc))
-
-        if depth > 0:
-            callers = inspect.getinnerframes(exc.__traceback__, context=depth)
-            seen = set()
-            for i, ff in enumerate(callers[-depth:]):
-                frm = ff[0]
-
-                f_self = frm.f_locals.get('self', None)
-                if isinstance(f_self, ParserElement):
-                    if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
-                        continue
-                    if f_self in seen:
-                        continue
-                    seen.add(f_self)
-
-                    self_type = type(f_self)
-                    ret.append("{0}.{1} - {2}".format(self_type.__module__,
-                                                      self_type.__name__,
-                                                      f_self))
-                elif f_self is not None:
-                    self_type = type(f_self)
-                    ret.append("{0}.{1}".format(self_type.__module__,
-                                                self_type.__name__))
-                else:
-                    code = frm.f_code
-                    if code.co_name in ('wrapper', ''):
-                        continue
-
-                    ret.append("{0}".format(code.co_name))
-
-                depth -= 1
-                if not depth:
-                    break
-
-        return '\n'.join(ret)
-
-
-class ParseFatalException(ParseBaseException):
-    """user-throwable exception thrown when inconsistent parse content
-       is found; stops all parsing immediately"""
-    pass
-
-class ParseSyntaxException(ParseFatalException):
-    """just like :class:`ParseFatalException`, but thrown internally
-    when an :class:`ErrorStop` ('-' operator) indicates
-    that parsing is to stop immediately because an unbacktrackable
-    syntax error has been found.
-    """
-    pass
-
-#~ class ReparseException(ParseBaseException):
-    #~ """Experimental class - parse actions can raise this exception to cause
-       #~ pyparsing to reparse the input string:
-        #~ - with a modified input string, and/or
-        #~ - with a modified start location
-       #~ Set the values of the ReparseException in the constructor, and raise the
-       #~ exception in a parse action to cause pyparsing to use the new string/location.
-       #~ Setting the values as None causes no change to be made.
-       #~ """
-    #~ def __init_( self, newstring, restartLoc ):
-        #~ self.newParseText = newstring
-        #~ self.reparseLoc = restartLoc
-
-class RecursiveGrammarException(Exception):
-    """exception thrown by :class:`ParserElement.validate` if the
-    grammar could be improperly recursive
-    """
-    def __init__(self, parseElementList):
-        self.parseElementTrace = parseElementList
-
-    def __str__(self):
-        return "RecursiveGrammarException: %s" % self.parseElementTrace
-
-class _ParseResultsWithOffset(object):
-    def __init__(self, p1, p2):
-        self.tup = (p1, p2)
-    def __getitem__(self, i):
-        return self.tup[i]
-    def __repr__(self):
-        return repr(self.tup[0])
-    def setOffset(self, i):
-        self.tup = (self.tup[0], i)
-
-class ParseResults(object):
-    """Structured parse results, to provide multiple means of access to
-    the parsed data:
-
-       - as a list (``len(results)``)
-       - by list index (``results[0], results[1]``, etc.)
-       - by attribute (``results.`` - see :class:`ParserElement.setResultsName`)
-
-    Example::
-
-        integer = Word(nums)
-        date_str = (integer.setResultsName("year") + '/'
-                        + integer.setResultsName("month") + '/'
-                        + integer.setResultsName("day"))
-        # equivalent form:
-        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
-        # parseString returns a ParseResults object
-        result = date_str.parseString("1999/12/31")
-
-        def test(s, fn=repr):
-            print("%s -> %s" % (s, fn(eval(s))))
-        test("list(result)")
-        test("result[0]")
-        test("result['month']")
-        test("result.day")
-        test("'month' in result")
-        test("'minutes' in result")
-        test("result.dump()", str)
-
-    prints::
-
-        list(result) -> ['1999', '/', '12', '/', '31']
-        result[0] -> '1999'
-        result['month'] -> '12'
-        result.day -> '31'
-        'month' in result -> True
-        'minutes' in result -> False
-        result.dump() -> ['1999', '/', '12', '/', '31']
-        - day: 31
-        - month: 12
-        - year: 1999
-    """
-    def __new__(cls, toklist=None, name=None, asList=True, modal=True):
-        if isinstance(toklist, cls):
-            return toklist
-        retobj = object.__new__(cls)
-        retobj.__doinit = True
-        return retobj
-
-    # Performance tuning: we construct a *lot* of these, so keep this
-    # constructor as small and fast as possible
-    def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
-        if self.__doinit:
-            self.__doinit = False
-            self.__name = None
-            self.__parent = None
-            self.__accumNames = {}
-            self.__asList = asList
-            self.__modal = modal
-            if toklist is None:
-                toklist = []
-            if isinstance(toklist, list):
-                self.__toklist = toklist[:]
-            elif isinstance(toklist, _generatorType):
-                self.__toklist = list(toklist)
-            else:
-                self.__toklist = [toklist]
-            self.__tokdict = dict()
-
-        if name is not None and name:
-            if not modal:
-                self.__accumNames[name] = 0
-            if isinstance(name, int):
-                name = _ustr(name)  # will always return a str, but use _ustr for consistency
-            self.__name = name
-            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
-                if isinstance(toklist, basestring):
-                    toklist = [toklist]
-                if asList:
-                    if isinstance(toklist, ParseResults):
-                        self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
-                    else:
-                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
-                    self[name].__name = name
-                else:
-                    try:
-                        self[name] = toklist[0]
-                    except (KeyError, TypeError, IndexError):
-                        self[name] = toklist
-
-    def __getitem__(self, i):
-        if isinstance(i, (int, slice)):
-            return self.__toklist[i]
-        else:
-            if i not in self.__accumNames:
-                return self.__tokdict[i][-1][0]
-            else:
-                return ParseResults([v[0] for v in self.__tokdict[i]])
-
-    def __setitem__(self, k, v, isinstance=isinstance):
-        if isinstance(v, _ParseResultsWithOffset):
-            self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
-            sub = v[0]
-        elif isinstance(k, (int, slice)):
-            self.__toklist[k] = v
-            sub = v
-        else:
-            self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
-            sub = v
-        if isinstance(sub, ParseResults):
-            sub.__parent = wkref(self)
-
-    def __delitem__(self, i):
-        if isinstance(i, (int, slice)):
-            mylen = len(self.__toklist)
-            del self.__toklist[i]
-
-            # convert int to slice
-            if isinstance(i, int):
-                if i < 0:
-                    i += mylen
-                i = slice(i, i + 1)
-            # get removed indices
-            removed = list(range(*i.indices(mylen)))
-            removed.reverse()
-            # fixup indices in token dictionary
-            for name, occurrences in self.__tokdict.items():
-                for j in removed:
-                    for k, (value, position) in enumerate(occurrences):
-                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
-        else:
-            del self.__tokdict[i]
-
-    def __contains__(self, k):
-        return k in self.__tokdict
-
-    def __len__(self):
-        return len(self.__toklist)
-
-    def __bool__(self):
-        return (not not self.__toklist)
-    __nonzero__ = __bool__
-
-    def __iter__(self):
-        return iter(self.__toklist)
-
-    def __reversed__(self):
-        return iter(self.__toklist[::-1])
-
-    def _iterkeys(self):
-        if hasattr(self.__tokdict, "iterkeys"):
-            return self.__tokdict.iterkeys()
-        else:
-            return iter(self.__tokdict)
-
-    def _itervalues(self):
-        return (self[k] for k in self._iterkeys())
-
-    def _iteritems(self):
-        return ((k, self[k]) for k in self._iterkeys())
-
-    if PY_3:
-        keys = _iterkeys
-        """Returns an iterator of all named result keys."""
-
-        values = _itervalues
-        """Returns an iterator of all named result values."""
-
-        items = _iteritems
-        """Returns an iterator of all named result key-value tuples."""
-
-    else:
-        iterkeys = _iterkeys
-        """Returns an iterator of all named result keys (Python 2.x only)."""
-
-        itervalues = _itervalues
-        """Returns an iterator of all named result values (Python 2.x only)."""
-
-        iteritems = _iteritems
-        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
-
-        def keys(self):
-            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
-            return list(self.iterkeys())
-
-        def values(self):
-            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
-            return list(self.itervalues())
-
-        def items(self):
-            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
-            return list(self.iteritems())
-
-    def haskeys(self):
-        """Since keys() returns an iterator, this method is helpful in bypassing
-           code that looks for the existence of any defined results names."""
-        return bool(self.__tokdict)
-
-    def pop(self, *args, **kwargs):
-        """
-        Removes and returns item at specified index (default= ``last``).
-        Supports both ``list`` and ``dict`` semantics for ``pop()``. If
-        passed no argument or an integer argument, it will use ``list``
-        semantics and pop tokens from the list of parsed tokens. If passed
-        a non-integer argument (most likely a string), it will use ``dict``
-        semantics and pop the corresponding value from any defined results
-        names. A second default return value argument is supported, just as in
-        ``dict.pop()``.
-
-        Example::
-
-            def remove_first(tokens):
-                tokens.pop(0)
-            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
-            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
-
-            label = Word(alphas)
-            patt = label("LABEL") + OneOrMore(Word(nums))
-            print(patt.parseString("AAB 123 321").dump())
-
-            # Use pop() in a parse action to remove named result (note that corresponding value is not
-            # removed from list form of results)
-            def remove_LABEL(tokens):
-                tokens.pop("LABEL")
-                return tokens
-            patt.addParseAction(remove_LABEL)
-            print(patt.parseString("AAB 123 321").dump())
-
-        prints::
-
-            ['AAB', '123', '321']
-            - LABEL: AAB
-
-            ['AAB', '123', '321']
-        """
-        if not args:
-            args = [-1]
-        for k, v in kwargs.items():
-            if k == 'default':
-                args = (args[0], v)
-            else:
-                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
-        if (isinstance(args[0], int)
-                or len(args) == 1
-                or args[0] in self):
-            index = args[0]
-            ret = self[index]
-            del self[index]
-            return ret
-        else:
-            defaultvalue = args[1]
-            return defaultvalue
-
-    def get(self, key, defaultValue=None):
-        """
-        Returns named result matching the given key, or if there is no
-        such name, then returns the given ``defaultValue`` or ``None`` if no
-        ``defaultValue`` is specified.
-
-        Similar to ``dict.get()``.
-
-        Example::
-
-            integer = Word(nums)
-            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
-            result = date_str.parseString("1999/12/31")
-            print(result.get("year")) # -> '1999'
-            print(result.get("hour", "not specified")) # -> 'not specified'
-            print(result.get("hour")) # -> None
-        """
-        if key in self:
-            return self[key]
-        else:
-            return defaultValue
-
-    def insert(self, index, insStr):
-        """
-        Inserts new element at location index in the list of parsed tokens.
-
-        Similar to ``list.insert()``.
-
-        Example::
-
-            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
-
-            # use a parse action to insert the parse location in the front of the parsed results
-            def insert_locn(locn, tokens):
-                tokens.insert(0, locn)
-            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
-        """
-        self.__toklist.insert(index, insStr)
-        # fixup indices in token dictionary
-        for name, occurrences in self.__tokdict.items():
-            for k, (value, position) in enumerate(occurrences):
-                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
-
-    def append(self, item):
-        """
-        Add single element to end of ParseResults list of elements.
-
-        Example::
-
-            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
-
-            # use a parse action to compute the sum of the parsed integers, and add it to the end
-            def append_sum(tokens):
-                tokens.append(sum(map(int, tokens)))
-            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
-        """
-        self.__toklist.append(item)
-
-    def extend(self, itemseq):
-        """
-        Add sequence of elements to end of ParseResults list of elements.
-
-        Example::
-
-            patt = OneOrMore(Word(alphas))
-
-            # use a parse action to append the reverse of the matched strings, to make a palindrome
-            def make_palindrome(tokens):
-                tokens.extend(reversed([t[::-1] for t in tokens]))
-                return ''.join(tokens)
-            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
-        """
-        if isinstance(itemseq, ParseResults):
-            self.__iadd__(itemseq)
-        else:
-            self.__toklist.extend(itemseq)
-
-    def clear(self):
-        """
-        Clear all elements and results names.
-        """
-        del self.__toklist[:]
-        self.__tokdict.clear()
-
-    def __getattr__(self, name):
-        try:
-            return self[name]
-        except KeyError:
-            return ""
-
-    def __add__(self, other):
-        ret = self.copy()
-        ret += other
-        return ret
-
-    def __iadd__(self, other):
-        if other.__tokdict:
-            offset = len(self.__toklist)
-            addoffset = lambda a: offset if a < 0 else a + offset
-            otheritems = other.__tokdict.items()
-            otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
-                              for k, vlist in otheritems for v in vlist]
-            for k, v in otherdictitems:
-                self[k] = v
-                if isinstance(v[0], ParseResults):
-                    v[0].__parent = wkref(self)
-
-        self.__toklist += other.__toklist
-        self.__accumNames.update(other.__accumNames)
-        return self
-
-    def __radd__(self, other):
-        if isinstance(other, int) and other == 0:
-            # useful for merging many ParseResults using sum() builtin
-            return self.copy()
-        else:
-            # this may raise a TypeError - so be it
-            return other + self
-
-    def __repr__(self):
-        return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
-
-    def __str__(self):
-        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
-
-    def _asStringList(self, sep=''):
-        out = []
-        for item in self.__toklist:
-            if out and sep:
-                out.append(sep)
-            if isinstance(item, ParseResults):
-                out += item._asStringList()
-            else:
-                out.append(_ustr(item))
-        return out
-
-    def asList(self):
-        """
-        Returns the parse results as a nested list of matching tokens, all converted to strings.
-
-        Example::
-
-            patt = OneOrMore(Word(alphas))
-            result = patt.parseString("sldkj lsdkj sldkj")
-            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
-            print(type(result), result) # ->  ['sldkj', 'lsdkj', 'sldkj']
-
-            # Use asList() to create an actual list
-            result_list = result.asList()
-            print(type(result_list), result_list) # ->  ['sldkj', 'lsdkj', 'sldkj']
-        """
-        return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
-
-    def asDict(self):
-        """
-        Returns the named parse results as a nested dictionary.
-
-        Example::
-
-            integer = Word(nums)
-            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
-            result = date_str.parseString('12/31/1999')
-            print(type(result), repr(result)) # ->  (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
-
-            result_dict = result.asDict()
-            print(type(result_dict), repr(result_dict)) # ->  {'day': '1999', 'year': '12', 'month': '31'}
-
-            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
-            import json
-            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
-            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
-        """
-        if PY_3:
-            item_fn = self.items
-        else:
-            item_fn = self.iteritems
-
-        def toItem(obj):
-            if isinstance(obj, ParseResults):
-                if obj.haskeys():
-                    return obj.asDict()
-                else:
-                    return [toItem(v) for v in obj]
-            else:
-                return obj
-
-        return dict((k, toItem(v)) for k, v in item_fn())
-
-    def copy(self):
-        """
-        Returns a new copy of a :class:`ParseResults` object.
-        """
-        ret = ParseResults(self.__toklist)
-        ret.__tokdict = dict(self.__tokdict.items())
-        ret.__parent = self.__parent
-        ret.__accumNames.update(self.__accumNames)
-        ret.__name = self.__name
-        return ret
-
-    def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
-        """
-        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
-        """
-        nl = "\n"
-        out = []
-        namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
-                          for v in vlist)
-        nextLevelIndent = indent + "  "
-
-        # collapse out indents if formatting is not desired
-        if not formatted:
-            indent = ""
-            nextLevelIndent = ""
-            nl = ""
-
-        selfTag = None
-        if doctag is not None:
-            selfTag = doctag
-        else:
-            if self.__name:
-                selfTag = self.__name
-
-        if not selfTag:
-            if namedItemsOnly:
-                return ""
-            else:
-                selfTag = "ITEM"
-
-        out += [nl, indent, "<", selfTag, ">"]
-
-        for i, res in enumerate(self.__toklist):
-            if isinstance(res, ParseResults):
-                if i in namedItems:
-                    out += [res.asXML(namedItems[i],
-                                      namedItemsOnly and doctag is None,
-                                      nextLevelIndent,
-                                      formatted)]
-                else:
-                    out += [res.asXML(None,
-                                      namedItemsOnly and doctag is None,
-                                      nextLevelIndent,
-                                      formatted)]
-            else:
-                # individual token, see if there is a name for it
-                resTag = None
-                if i in namedItems:
-                    resTag = namedItems[i]
-                if not resTag:
-                    if namedItemsOnly:
-                        continue
-                    else:
-                        resTag = "ITEM"
-                xmlBodyText = _xml_escape(_ustr(res))
-                out += [nl, nextLevelIndent, "<", resTag, ">",
-                        xmlBodyText,
-                                                ""]
-
-        out += [nl, indent, ""]
-        return "".join(out)
-
-    def __lookup(self, sub):
-        for k, vlist in self.__tokdict.items():
-            for v, loc in vlist:
-                if sub is v:
-                    return k
-        return None
-
-    def getName(self):
-        r"""
-        Returns the results name for this token expression. Useful when several
-        different expressions might match at a particular location.
-
-        Example::
-
-            integer = Word(nums)
-            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
-            house_number_expr = Suppress('#') + Word(nums, alphanums)
-            user_data = (Group(house_number_expr)("house_number")
-                        | Group(ssn_expr)("ssn")
-                        | Group(integer)("age"))
-            user_info = OneOrMore(user_data)
-
-            result = user_info.parseString("22 111-22-3333 #221B")
-            for item in result:
-                print(item.getName(), ':', item[0])
-
-        prints::
-
-            age : 22
-            ssn : 111-22-3333
-            house_number : 221B
-        """
-        if self.__name:
-            return self.__name
-        elif self.__parent:
-            par = self.__parent()
-            if par:
-                return par.__lookup(self)
-            else:
-                return None
-        elif (len(self) == 1
-              and len(self.__tokdict) == 1
-              and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
-            return next(iter(self.__tokdict.keys()))
-        else:
-            return None
-
-    def dump(self, indent='', full=True, include_list=True, _depth=0):
-        """
-        Diagnostic method for listing out the contents of
-        a :class:`ParseResults`. Accepts an optional ``indent`` argument so
-        that this string can be embedded in a nested display of other data.
-
-        Example::
-
-            integer = Word(nums)
-            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
-            result = date_str.parseString('12/31/1999')
-            print(result.dump())
-
-        prints::
-
-            ['12', '/', '31', '/', '1999']
-            - day: 1999
-            - month: 31
-            - year: 12
-        """
-        out = []
-        NL = '\n'
-        if include_list:
-            out.append(indent + _ustr(self.asList()))
-        else:
-            out.append('')
-
-        if full:
-            if self.haskeys():
-                items = sorted((str(k), v) for k, v in self.items())
-                for k, v in items:
-                    if out:
-                        out.append(NL)
-                    out.append("%s%s- %s: " % (indent, ('  ' * _depth), k))
-                    if isinstance(v, ParseResults):
-                        if v:
-                            out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
-                        else:
-                            out.append(_ustr(v))
-                    else:
-                        out.append(repr(v))
-            elif any(isinstance(vv, ParseResults) for vv in self):
-                v = self
-                for i, vv in enumerate(v):
-                    if isinstance(vv, ParseResults):
-                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
-                                                            ('  ' * (_depth)),
-                                                            i,
-                                                            indent,
-                                                            ('  ' * (_depth + 1)),
-                                                            vv.dump(indent=indent,
-                                                                    full=full,
-                                                                    include_list=include_list,
-                                                                    _depth=_depth + 1)))
-                    else:
-                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
-                                                            ('  ' * (_depth)),
-                                                            i,
-                                                            indent,
-                                                            ('  ' * (_depth + 1)),
-                                                            _ustr(vv)))
-
-        return "".join(out)
-
-    def pprint(self, *args, **kwargs):
-        """
-        Pretty-printer for parsed results as a list, using the
-        `pprint `_ module.
-        Accepts additional positional or keyword args as defined for
-        `pprint.pprint `_ .
-
-        Example::
-
-            ident = Word(alphas, alphanums)
-            num = Word(nums)
-            func = Forward()
-            term = ident | num | Group('(' + func + ')')
-            func <<= ident + Group(Optional(delimitedList(term)))
-            result = func.parseString("fna a,b,(fnb c,d,200),100")
-            result.pprint(width=40)
-
-        prints::
-
-            ['fna',
-             ['a',
-              'b',
-              ['(', 'fnb', ['c', 'd', '200'], ')'],
-              '100']]
-        """
-        pprint.pprint(self.asList(), *args, **kwargs)
-
-    # add support for pickle protocol
-    def __getstate__(self):
-        return (self.__toklist,
-                (self.__tokdict.copy(),
-                 self.__parent is not None and self.__parent() or None,
-                 self.__accumNames,
-                 self.__name))
-
-    def __setstate__(self, state):
-        self.__toklist = state[0]
-        self.__tokdict, par, inAccumNames, self.__name = state[1]
-        self.__accumNames = {}
-        self.__accumNames.update(inAccumNames)
-        if par is not None:
-            self.__parent = wkref(par)
-        else:
-            self.__parent = None
-
-    def __getnewargs__(self):
-        return self.__toklist, self.__name, self.__asList, self.__modal
-
-    def __dir__(self):
-        return dir(type(self)) + list(self.keys())
-
-    @classmethod
-    def from_dict(cls, other, name=None):
-        """
-        Helper classmethod to construct a ParseResults from a dict, preserving the
-        name-value relations as results names. If an optional 'name' argument is
-        given, a nested ParseResults will be returned
-        """
-        def is_iterable(obj):
-            try:
-                iter(obj)
-            except Exception:
-                return False
-            else:
-                if PY_3:
-                    return not isinstance(obj, (str, bytes))
-                else:
-                    return not isinstance(obj, basestring)
-
-        ret = cls([])
-        for k, v in other.items():
-            if isinstance(v, Mapping):
-                ret += cls.from_dict(v, name=k)
-            else:
-                ret += cls([v], name=k, asList=is_iterable(v))
-        if name is not None:
-            ret = cls([ret], name=name)
-        return ret
-
-MutableMapping.register(ParseResults)
-
-def col (loc, strg):
-    """Returns current column within a string, counting newlines as line separators.
-   The first column is number 1.
-
-   Note: the default parsing behavior is to expand tabs in the input string
-   before starting the parsing process.  See
-   :class:`ParserElement.parseString` for more
-   information on parsing strings containing ```` s, and suggested
-   methods to maintain a consistent view of the parsed string, the parse
-   location, and line and column positions within the parsed string.
-   """
-    s = strg
-    return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
-
-def lineno(loc, strg):
-    """Returns current line number within a string, counting newlines as line separators.
-    The first line is number 1.
-
-    Note - the default parsing behavior is to expand tabs in the input string
-    before starting the parsing process.  See :class:`ParserElement.parseString`
-    for more information on parsing strings containing ```` s, and
-    suggested methods to maintain a consistent view of the parsed string, the
-    parse location, and line and column positions within the parsed string.
-    """
-    return strg.count("\n", 0, loc) + 1
-
-def line(loc, strg):
-    """Returns the line of text containing loc within a string, counting newlines as line separators.
-       """
-    lastCR = strg.rfind("\n", 0, loc)
-    nextCR = strg.find("\n", loc)
-    if nextCR >= 0:
-        return strg[lastCR + 1:nextCR]
-    else:
-        return strg[lastCR + 1:]
-
-def _defaultStartDebugAction(instring, loc, expr):
-    print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
-
-def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
-    print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
-
-def _defaultExceptionDebugAction(instring, loc, expr, exc):
-    print("Exception raised:" + _ustr(exc))
-
-def nullDebugAction(*args):
-    """'Do-nothing' debug action, to suppress debugging output during parsing."""
-    pass
-
-# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
-#~ 'decorator to trim function calls to match the arity of the target'
-#~ def _trim_arity(func, maxargs=3):
-    #~ if func in singleArgBuiltins:
-        #~ return lambda s,l,t: func(t)
-    #~ limit = 0
-    #~ foundArity = False
-    #~ def wrapper(*args):
-        #~ nonlocal limit,foundArity
-        #~ while 1:
-            #~ try:
-                #~ ret = func(*args[limit:])
-                #~ foundArity = True
-                #~ return ret
-            #~ except TypeError:
-                #~ if limit == maxargs or foundArity:
-                    #~ raise
-                #~ limit += 1
-                #~ continue
-    #~ return wrapper
-
-# this version is Python 2.x-3.x cross-compatible
-'decorator to trim function calls to match the arity of the target'
-def _trim_arity(func, maxargs=2):
-    if func in singleArgBuiltins:
-        return lambda s, l, t: func(t)
-    limit = [0]
-    foundArity = [False]
-
-    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
-    if system_version[:2] >= (3, 5):
-        def extract_stack(limit=0):
-            # special handling for Python 3.5.0 - extra deep call stack by 1
-            offset = -3 if system_version == (3, 5, 0) else -2
-            frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
-            return [frame_summary[:2]]
-        def extract_tb(tb, limit=0):
-            frames = traceback.extract_tb(tb, limit=limit)
-            frame_summary = frames[-1]
-            return [frame_summary[:2]]
-    else:
-        extract_stack = traceback.extract_stack
-        extract_tb = traceback.extract_tb
-
-    # synthesize what would be returned by traceback.extract_stack at the call to
-    # user's parse action 'func', so that we don't incur call penalty at parse time
-
-    LINE_DIFF = 6
-    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
-    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
-    this_line = extract_stack(limit=2)[-1]
-    pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
-
-    def wrapper(*args):
-        while 1:
-            try:
-                ret = func(*args[limit[0]:])
-                foundArity[0] = True
-                return ret
-            except TypeError:
-                # re-raise TypeErrors if they did not come from our arity testing
-                if foundArity[0]:
-                    raise
-                else:
-                    try:
-                        tb = sys.exc_info()[-1]
-                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
-                            raise
-                    finally:
-                        try:
-                            del tb
-                        except NameError:
-                            pass
-
-                if limit[0] <= maxargs:
-                    limit[0] += 1
-                    continue
-                raise
-
-    # copy func name to wrapper for sensible debug output
-    func_name = ""
-    try:
-        func_name = getattr(func, '__name__',
-                            getattr(func, '__class__').__name__)
-    except Exception:
-        func_name = str(func)
-    wrapper.__name__ = func_name
-
-    return wrapper
-
-
-class ParserElement(object):
-    """Abstract base level parser element class."""
-    DEFAULT_WHITE_CHARS = " \n\t\r"
-    verbose_stacktrace = False
-
-    @staticmethod
-    def setDefaultWhitespaceChars(chars):
-        r"""
-        Overrides the default whitespace chars
-
-        Example::
-
-            # default whitespace chars are space,  and newline
-            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
-
-            # change to just treat newline as significant
-            ParserElement.setDefaultWhitespaceChars(" \t")
-            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
-        """
-        ParserElement.DEFAULT_WHITE_CHARS = chars
-
-    @staticmethod
-    def inlineLiteralsUsing(cls):
-        """
-        Set class to be used for inclusion of string literals into a parser.
-
-        Example::
-
-            # default literal class used is Literal
-            integer = Word(nums)
-            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
-            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
-
-
-            # change to Suppress
-            ParserElement.inlineLiteralsUsing(Suppress)
-            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
-            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
-        """
-        ParserElement._literalStringClass = cls
-
-    @classmethod
-    def _trim_traceback(cls, tb):
-        while tb.tb_next:
-            tb = tb.tb_next
-        return tb
-
-    def __init__(self, savelist=False):
-        self.parseAction = list()
-        self.failAction = None
-        # ~ self.name = ""  # don't define self.name, let subclasses try/except upcall
-        self.strRepr = None
-        self.resultsName = None
-        self.saveAsList = savelist
-        self.skipWhitespace = True
-        self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
-        self.copyDefaultWhiteChars = True
-        self.mayReturnEmpty = False # used when checking for left-recursion
-        self.keepTabs = False
-        self.ignoreExprs = list()
-        self.debug = False
-        self.streamlined = False
-        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
-        self.errmsg = ""
-        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
-        self.debugActions = (None, None, None)  # custom debug actions
-        self.re = None
-        self.callPreparse = True # used to avoid redundant calls to preParse
-        self.callDuringTry = False
-
-    def copy(self):
-        """
-        Make a copy of this :class:`ParserElement`.  Useful for defining
-        different parse actions for the same parsing pattern, using copies of
-        the original parse element.
-
-        Example::
-
-            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
-            integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
-            integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
-
-            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
-
-        prints::
-
-            [5120, 100, 655360, 268435456]
-
-        Equivalent form of ``expr.copy()`` is just ``expr()``::
-
-            integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
-        """
-        cpy = copy.copy(self)
-        cpy.parseAction = self.parseAction[:]
-        cpy.ignoreExprs = self.ignoreExprs[:]
-        if self.copyDefaultWhiteChars:
-            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
-        return cpy
-
-    def setName(self, name):
-        """
-        Define name for this expression, makes debugging and exception messages clearer.
-
-        Example::
-
-            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
-            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
-        """
-        self.name = name
-        self.errmsg = "Expected " + self.name
-        if __diag__.enable_debug_on_named_expressions:
-            self.setDebug()
-        return self
-
-    def setResultsName(self, name, listAllMatches=False):
-        """
-        Define name for referencing matching tokens as a nested attribute
-        of the returned parse results.
-        NOTE: this returns a *copy* of the original :class:`ParserElement` object;
-        this is so that the client can define a basic element, such as an
-        integer, and reference it in multiple places with different names.
-
-        You can also set results names using the abbreviated syntax,
-        ``expr("name")`` in place of ``expr.setResultsName("name")``
-        - see :class:`__call__`.
-
-        Example::
-
-            date_str = (integer.setResultsName("year") + '/'
-                        + integer.setResultsName("month") + '/'
-                        + integer.setResultsName("day"))
-
-            # equivalent form:
-            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-        """
-        return self._setResultsName(name, listAllMatches)
-
-    def _setResultsName(self, name, listAllMatches=False):
-        newself = self.copy()
-        if name.endswith("*"):
-            name = name[:-1]
-            listAllMatches = True
-        newself.resultsName = name
-        newself.modalResults = not listAllMatches
-        return newself
-
-    def setBreak(self, breakFlag=True):
-        """Method to invoke the Python pdb debugger when this element is
-           about to be parsed. Set ``breakFlag`` to True to enable, False to
-           disable.
-        """
-        if breakFlag:
-            _parseMethod = self._parse
-            def breaker(instring, loc, doActions=True, callPreParse=True):
-                import pdb
-                # this call to pdb.set_trace() is intentional, not a checkin error
-                pdb.set_trace()
-                return _parseMethod(instring, loc, doActions, callPreParse)
-            breaker._originalParseMethod = _parseMethod
-            self._parse = breaker
-        else:
-            if hasattr(self._parse, "_originalParseMethod"):
-                self._parse = self._parse._originalParseMethod
-        return self
-
-    def setParseAction(self, *fns, **kwargs):
-        """
-        Define one or more actions to perform when successfully matching parse element definition.
-        Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
-        ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
-
-        - s   = the original string being parsed (see note below)
-        - loc = the location of the matching substring
-        - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
-
-        If the functions in fns modify the tokens, they can return them as the return
-        value from fn, and the modified list of tokens will replace the original.
-        Otherwise, fn does not need to return any value.
-
-        If None is passed as the parse action, all previously added parse actions for this
-        expression are cleared.
-
-        Optional keyword arguments:
-        - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
-
-        Note: the default parsing behavior is to expand tabs in the input string
-        before starting the parsing process.  See :class:`parseString for more
-        information on parsing strings containing ```` s, and suggested
-        methods to maintain a consistent view of the parsed string, the parse
-        location, and line and column positions within the parsed string.
-
-        Example::
-
-            integer = Word(nums)
-            date_str = integer + '/' + integer + '/' + integer
-
-            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
-
-            # use parse action to convert to ints at parse time
-            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
-            date_str = integer + '/' + integer + '/' + integer
-
-            # note that integer fields are now ints, not strings
-            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
-        """
-        if list(fns) == [None,]:
-            self.parseAction = []
-        else:
-            if not all(callable(fn) for fn in fns):
-                raise TypeError("parse actions must be callable")
-            self.parseAction = list(map(_trim_arity, list(fns)))
-            self.callDuringTry = kwargs.get("callDuringTry", False)
-        return self
-
-    def addParseAction(self, *fns, **kwargs):
-        """
-        Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
-
-        See examples in :class:`copy`.
-        """
-        self.parseAction += list(map(_trim_arity, list(fns)))
-        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
-        return self
-
-    def addCondition(self, *fns, **kwargs):
-        """Add a boolean predicate function to expression's list of parse actions. See
-        :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
-        functions passed to ``addCondition`` need to return boolean success/fail of the condition.
-
-        Optional keyword arguments:
-        - message = define a custom message to be used in the raised exception
-        - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
-
-        Example::
-
-            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
-            year_int = integer.copy()
-            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
-            date_str = year_int + '/' + integer + '/' + integer
-
-            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
-        """
-        for fn in fns:
-            self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
-                                                           fatal=kwargs.get('fatal', False)))
-
-        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
-        return self
-
-    def setFailAction(self, fn):
-        """Define action to perform if parsing fails at this expression.
-           Fail acton fn is a callable function that takes the arguments
-           ``fn(s, loc, expr, err)`` where:
-           - s = string being parsed
-           - loc = location where expression match was attempted and failed
-           - expr = the parse expression that failed
-           - err = the exception thrown
-           The function returns no value.  It may throw :class:`ParseFatalException`
-           if it is desired to stop parsing immediately."""
-        self.failAction = fn
-        return self
-
-    def _skipIgnorables(self, instring, loc):
-        exprsFound = True
-        while exprsFound:
-            exprsFound = False
-            for e in self.ignoreExprs:
-                try:
-                    while 1:
-                        loc, dummy = e._parse(instring, loc)
-                        exprsFound = True
-                except ParseException:
-                    pass
-        return loc
-
-    def preParse(self, instring, loc):
-        if self.ignoreExprs:
-            loc = self._skipIgnorables(instring, loc)
-
-        if self.skipWhitespace:
-            wt = self.whiteChars
-            instrlen = len(instring)
-            while loc < instrlen and instring[loc] in wt:
-                loc += 1
-
-        return loc
-
-    def parseImpl(self, instring, loc, doActions=True):
-        return loc, []
-
-    def postParse(self, instring, loc, tokenlist):
-        return tokenlist
-
-    # ~ @profile
-    def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
-        TRY, MATCH, FAIL = 0, 1, 2
-        debugging = (self.debug)  # and doActions)
-
-        if debugging or self.failAction:
-            # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring)))
-            if self.debugActions[TRY]:
-                self.debugActions[TRY](instring, loc, self)
-            try:
-                if callPreParse and self.callPreparse:
-                    preloc = self.preParse(instring, loc)
-                else:
-                    preloc = loc
-                tokensStart = preloc
-                if self.mayIndexError or preloc >= len(instring):
-                    try:
-                        loc, tokens = self.parseImpl(instring, preloc, doActions)
-                    except IndexError:
-                        raise ParseException(instring, len(instring), self.errmsg, self)
-                else:
-                    loc, tokens = self.parseImpl(instring, preloc, doActions)
-            except Exception as err:
-                # ~ print ("Exception raised:", err)
-                if self.debugActions[FAIL]:
-                    self.debugActions[FAIL](instring, tokensStart, self, err)
-                if self.failAction:
-                    self.failAction(instring, tokensStart, self, err)
-                raise
-        else:
-            if callPreParse and self.callPreparse:
-                preloc = self.preParse(instring, loc)
-            else:
-                preloc = loc
-            tokensStart = preloc
-            if self.mayIndexError or preloc >= len(instring):
-                try:
-                    loc, tokens = self.parseImpl(instring, preloc, doActions)
-                except IndexError:
-                    raise ParseException(instring, len(instring), self.errmsg, self)
-            else:
-                loc, tokens = self.parseImpl(instring, preloc, doActions)
-
-        tokens = self.postParse(instring, loc, tokens)
-
-        retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
-        if self.parseAction and (doActions or self.callDuringTry):
-            if debugging:
-                try:
-                    for fn in self.parseAction:
-                        try:
-                            tokens = fn(instring, tokensStart, retTokens)
-                        except IndexError as parse_action_exc:
-                            exc = ParseException("exception raised in parse action")
-                            exc.__cause__ = parse_action_exc
-                            raise exc
-
-                        if tokens is not None and tokens is not retTokens:
-                            retTokens = ParseResults(tokens,
-                                                      self.resultsName,
-                                                      asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
-                                                      modal=self.modalResults)
-                except Exception as err:
-                    # ~ print "Exception raised in user parse action:", err
-                    if self.debugActions[FAIL]:
-                        self.debugActions[FAIL](instring, tokensStart, self, err)
-                    raise
-            else:
-                for fn in self.parseAction:
-                    try:
-                        tokens = fn(instring, tokensStart, retTokens)
-                    except IndexError as parse_action_exc:
-                        exc = ParseException("exception raised in parse action")
-                        exc.__cause__ = parse_action_exc
-                        raise exc
-
-                    if tokens is not None and tokens is not retTokens:
-                        retTokens = ParseResults(tokens,
-                                                  self.resultsName,
-                                                  asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
-                                                  modal=self.modalResults)
-        if debugging:
-            # ~ print ("Matched", self, "->", retTokens.asList())
-            if self.debugActions[MATCH]:
-                self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
-
-        return loc, retTokens
-
-    def tryParse(self, instring, loc):
-        try:
-            return self._parse(instring, loc, doActions=False)[0]
-        except ParseFatalException:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-    def canParseNext(self, instring, loc):
-        try:
-            self.tryParse(instring, loc)
-        except (ParseException, IndexError):
-            return False
-        else:
-            return True
-
-    class _UnboundedCache(object):
-        def __init__(self):
-            cache = {}
-            self.not_in_cache = not_in_cache = object()
-
-            def get(self, key):
-                return cache.get(key, not_in_cache)
-
-            def set(self, key, value):
-                cache[key] = value
-
-            def clear(self):
-                cache.clear()
-
-            def cache_len(self):
-                return len(cache)
-
-            self.get = types.MethodType(get, self)
-            self.set = types.MethodType(set, self)
-            self.clear = types.MethodType(clear, self)
-            self.__len__ = types.MethodType(cache_len, self)
-
-    if _OrderedDict is not None:
-        class _FifoCache(object):
-            def __init__(self, size):
-                self.not_in_cache = not_in_cache = object()
-
-                cache = _OrderedDict()
-
-                def get(self, key):
-                    return cache.get(key, not_in_cache)
-
-                def set(self, key, value):
-                    cache[key] = value
-                    while len(cache) > size:
-                        try:
-                            cache.popitem(False)
-                        except KeyError:
-                            pass
-
-                def clear(self):
-                    cache.clear()
-
-                def cache_len(self):
-                    return len(cache)
-
-                self.get = types.MethodType(get, self)
-                self.set = types.MethodType(set, self)
-                self.clear = types.MethodType(clear, self)
-                self.__len__ = types.MethodType(cache_len, self)
-
-    else:
-        class _FifoCache(object):
-            def __init__(self, size):
-                self.not_in_cache = not_in_cache = object()
-
-                cache = {}
-                key_fifo = collections.deque([], size)
-
-                def get(self, key):
-                    return cache.get(key, not_in_cache)
-
-                def set(self, key, value):
-                    cache[key] = value
-                    while len(key_fifo) > size:
-                        cache.pop(key_fifo.popleft(), None)
-                    key_fifo.append(key)
-
-                def clear(self):
-                    cache.clear()
-                    key_fifo.clear()
-
-                def cache_len(self):
-                    return len(cache)
-
-                self.get = types.MethodType(get, self)
-                self.set = types.MethodType(set, self)
-                self.clear = types.MethodType(clear, self)
-                self.__len__ = types.MethodType(cache_len, self)
-
-    # argument cache for optimizing repeated calls when backtracking through recursive expressions
-    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
-    packrat_cache_lock = RLock()
-    packrat_cache_stats = [0, 0]
-
-    # this method gets repeatedly called during backtracking with the same arguments -
-    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
-    def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
-        HIT, MISS = 0, 1
-        lookup = (self, instring, loc, callPreParse, doActions)
-        with ParserElement.packrat_cache_lock:
-            cache = ParserElement.packrat_cache
-            value = cache.get(lookup)
-            if value is cache.not_in_cache:
-                ParserElement.packrat_cache_stats[MISS] += 1
-                try:
-                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
-                except ParseBaseException as pe:
-                    # cache a copy of the exception, without the traceback
-                    cache.set(lookup, pe.__class__(*pe.args))
-                    raise
-                else:
-                    cache.set(lookup, (value[0], value[1].copy()))
-                    return value
-            else:
-                ParserElement.packrat_cache_stats[HIT] += 1
-                if isinstance(value, Exception):
-                    raise value
-                return value[0], value[1].copy()
-
-    _parse = _parseNoCache
-
-    @staticmethod
-    def resetCache():
-        ParserElement.packrat_cache.clear()
-        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
-
-    _packratEnabled = False
-    @staticmethod
-    def enablePackrat(cache_size_limit=128):
-        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
-           Repeated parse attempts at the same string location (which happens
-           often in many complex grammars) can immediately return a cached value,
-           instead of re-executing parsing/validating code.  Memoizing is done of
-           both valid results and parsing exceptions.
-
-           Parameters:
-
-           - cache_size_limit - (default= ``128``) - if an integer value is provided
-             will limit the size of the packrat cache; if None is passed, then
-             the cache size will be unbounded; if 0 is passed, the cache will
-             be effectively disabled.
-
-           This speedup may break existing programs that use parse actions that
-           have side-effects.  For this reason, packrat parsing is disabled when
-           you first import pyparsing.  To activate the packrat feature, your
-           program must call the class method :class:`ParserElement.enablePackrat`.
-           For best results, call ``enablePackrat()`` immediately after
-           importing pyparsing.
-
-           Example::
-
-               from pip_api._vendor import pyparsing
-               pyparsing.ParserElement.enablePackrat()
-        """
-        if not ParserElement._packratEnabled:
-            ParserElement._packratEnabled = True
-            if cache_size_limit is None:
-                ParserElement.packrat_cache = ParserElement._UnboundedCache()
-            else:
-                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
-            ParserElement._parse = ParserElement._parseCache
-
-    def parseString(self, instring, parseAll=False):
-        """
-        Execute the parse expression with the given string.
-        This is the main interface to the client code, once the complete
-        expression has been built.
-
-        Returns the parsed data as a :class:`ParseResults` object, which may be
-        accessed as a list, or as a dict or object with attributes if the given parser
-        includes results names.
-
-        If you want the grammar to require that the entire input string be
-        successfully parsed, then set ``parseAll`` to True (equivalent to ending
-        the grammar with ``StringEnd()``).
-
-        Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
-        in order to report proper column numbers in parse actions.
-        If the input string contains tabs and
-        the grammar uses parse actions that use the ``loc`` argument to index into the
-        string being parsed, you can ensure you have a consistent view of the input
-        string by:
-
-        - calling ``parseWithTabs`` on your grammar before calling ``parseString``
-          (see :class:`parseWithTabs`)
-        - define your parse action using the full ``(s, loc, toks)`` signature, and
-          reference the input string using the parse action's ``s`` argument
-        - explictly expand the tabs in your input string before calling
-          ``parseString``
-
-        Example::
-
-            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
-            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
-        """
-        ParserElement.resetCache()
-        if not self.streamlined:
-            self.streamline()
-            # ~ self.saveAsList = True
-        for e in self.ignoreExprs:
-            e.streamline()
-        if not self.keepTabs:
-            instring = instring.expandtabs()
-        try:
-            loc, tokens = self._parse(instring, 0)
-            if parseAll:
-                loc = self.preParse(instring, loc)
-                se = Empty() + StringEnd()
-                se._parse(instring, loc)
-        except ParseBaseException as exc:
-            if ParserElement.verbose_stacktrace:
-                raise
-            else:
-                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
-                if getattr(exc, '__traceback__', None) is not None:
-                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-                raise exc
-        else:
-            return tokens
-
-    def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
-        """
-        Scan the input string for expression matches.  Each match will return the
-        matching tokens, start location, and end location.  May be called with optional
-        ``maxMatches`` argument, to clip scanning after 'n' matches are found.  If
-        ``overlap`` is specified, then overlapping matches will be reported.
-
-        Note that the start and end locations are reported relative to the string
-        being parsed.  See :class:`parseString` for more information on parsing
-        strings with embedded tabs.
-
-        Example::
-
-            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
-            print(source)
-            for tokens, start, end in Word(alphas).scanString(source):
-                print(' '*start + '^'*(end-start))
-                print(' '*start + tokens[0])
-
-        prints::
-
-            sldjf123lsdjjkf345sldkjf879lkjsfd987
-            ^^^^^
-            sldjf
-                    ^^^^^^^
-                    lsdjjkf
-                              ^^^^^^
-                              sldkjf
-                                       ^^^^^^
-                                       lkjsfd
-        """
-        if not self.streamlined:
-            self.streamline()
-        for e in self.ignoreExprs:
-            e.streamline()
-
-        if not self.keepTabs:
-            instring = _ustr(instring).expandtabs()
-        instrlen = len(instring)
-        loc = 0
-        preparseFn = self.preParse
-        parseFn = self._parse
-        ParserElement.resetCache()
-        matches = 0
-        try:
-            while loc <= instrlen and matches < maxMatches:
-                try:
-                    preloc = preparseFn(instring, loc)
-                    nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
-                except ParseException:
-                    loc = preloc + 1
-                else:
-                    if nextLoc > loc:
-                        matches += 1
-                        yield tokens, preloc, nextLoc
-                        if overlap:
-                            nextloc = preparseFn(instring, loc)
-                            if nextloc > loc:
-                                loc = nextLoc
-                            else:
-                                loc += 1
-                        else:
-                            loc = nextLoc
-                    else:
-                        loc = preloc + 1
-        except ParseBaseException as exc:
-            if ParserElement.verbose_stacktrace:
-                raise
-            else:
-                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
-                if getattr(exc, '__traceback__', None) is not None:
-                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-                raise exc
-
-    def transformString(self, instring):
-        """
-        Extension to :class:`scanString`, to modify matching text with modified tokens that may
-        be returned from a parse action.  To use ``transformString``, define a grammar and
-        attach a parse action to it that modifies the returned token list.
-        Invoking ``transformString()`` on a target string will then scan for matches,
-        and replace the matched text patterns according to the logic in the parse
-        action.  ``transformString()`` returns the resulting transformed string.
-
-        Example::
-
-            wd = Word(alphas)
-            wd.setParseAction(lambda toks: toks[0].title())
-
-            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
-
-        prints::
-
-            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
-        """
-        out = []
-        lastE = 0
-        # force preservation of s, to minimize unwanted transformation of string, and to
-        # keep string locs straight between transformString and scanString
-        self.keepTabs = True
-        try:
-            for t, s, e in self.scanString(instring):
-                out.append(instring[lastE:s])
-                if t:
-                    if isinstance(t, ParseResults):
-                        out += t.asList()
-                    elif isinstance(t, list):
-                        out += t
-                    else:
-                        out.append(t)
-                lastE = e
-            out.append(instring[lastE:])
-            out = [o for o in out if o]
-            return "".join(map(_ustr, _flatten(out)))
-        except ParseBaseException as exc:
-            if ParserElement.verbose_stacktrace:
-                raise
-            else:
-                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
-                if getattr(exc, '__traceback__', None) is not None:
-                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-                raise exc
-
-    def searchString(self, instring, maxMatches=_MAX_INT):
-        """
-        Another extension to :class:`scanString`, simplifying the access to the tokens found
-        to match the given parse expression.  May be called with optional
-        ``maxMatches`` argument, to clip searching after 'n' matches are found.
-
-        Example::
-
-            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
-            cap_word = Word(alphas.upper(), alphas.lower())
-
-            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
-
-            # the sum() builtin can be used to merge results into a single ParseResults object
-            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
-
-        prints::
-
-            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
-            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
-        """
-        try:
-            return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
-        except ParseBaseException as exc:
-            if ParserElement.verbose_stacktrace:
-                raise
-            else:
-                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
-                if getattr(exc, '__traceback__', None) is not None:
-                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-                raise exc
-
-    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
-        """
-        Generator method to split a string using the given expression as a separator.
-        May be called with optional ``maxsplit`` argument, to limit the number of splits;
-        and the optional ``includeSeparators`` argument (default= ``False``), if the separating
-        matching text should be included in the split results.
-
-        Example::
-
-            punc = oneOf(list(".,;:/-!?"))
-            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
-
-        prints::
-
-            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
-        """
-        splits = 0
-        last = 0
-        for t, s, e in self.scanString(instring, maxMatches=maxsplit):
-            yield instring[last:s]
-            if includeSeparators:
-                yield t[0]
-            last = e
-        yield instring[last:]
-
-    def __add__(self, other):
-        """
-        Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
-        converts them to :class:`Literal`s by default.
-
-        Example::
-
-            greet = Word(alphas) + "," + Word(alphas) + "!"
-            hello = "Hello, World!"
-            print (hello, "->", greet.parseString(hello))
-
-        prints::
-
-            Hello, World! -> ['Hello', ',', 'World', '!']
-
-        ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
-
-            Literal('start') + ... + Literal('end')
-
-        is equivalent to:
-
-            Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
-
-        Note that the skipped text is returned with '_skipped' as a results name,
-        and to support having multiple skips in the same parser, the value returned is
-        a list of all skipped text.
-        """
-        if other is Ellipsis:
-            return _PendingSkip(self)
-
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return And([self, other])
-
-    def __radd__(self, other):
-        """
-        Implementation of + operator when left operand is not a :class:`ParserElement`
-        """
-        if other is Ellipsis:
-            return SkipTo(self)("_skipped*") + self
-
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return other + self
-
-    def __sub__(self, other):
-        """
-        Implementation of - operator, returns :class:`And` with error stop
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return self + And._ErrorStop() + other
-
-    def __rsub__(self, other):
-        """
-        Implementation of - operator when left operand is not a :class:`ParserElement`
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return other - self
-
-    def __mul__(self, other):
-        """
-        Implementation of * operator, allows use of ``expr * 3`` in place of
-        ``expr + expr + expr``.  Expressions may also me multiplied by a 2-integer
-        tuple, similar to ``{min, max}`` multipliers in regular expressions.  Tuples
-        may also include ``None`` as in:
-         - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
-              to ``expr*n + ZeroOrMore(expr)``
-              (read as "at least n instances of ``expr``")
-         - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
-              (read as "0 to n instances of ``expr``")
-         - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
-         - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
-
-        Note that ``expr*(None, n)`` does not raise an exception if
-        more than n exprs exist in the input stream; that is,
-        ``expr*(None, n)`` does not enforce a maximum number of expr
-        occurrences.  If this behavior is desired, then write
-        ``expr*(None, n) + ~expr``
-        """
-        if other is Ellipsis:
-            other = (0, None)
-        elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
-            other = ((0, ) + other[1:] + (None,))[:2]
-
-        if isinstance(other, int):
-            minElements, optElements = other, 0
-        elif isinstance(other, tuple):
-            other = tuple(o if o is not Ellipsis else None for o in other)
-            other = (other + (None, None))[:2]
-            if other[0] is None:
-                other = (0, other[1])
-            if isinstance(other[0], int) and other[1] is None:
-                if other[0] == 0:
-                    return ZeroOrMore(self)
-                if other[0] == 1:
-                    return OneOrMore(self)
-                else:
-                    return self * other[0] + ZeroOrMore(self)
-            elif isinstance(other[0], int) and isinstance(other[1], int):
-                minElements, optElements = other
-                optElements -= minElements
-            else:
-                raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
-        else:
-            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
-
-        if minElements < 0:
-            raise ValueError("cannot multiply ParserElement by negative value")
-        if optElements < 0:
-            raise ValueError("second tuple value must be greater or equal to first tuple value")
-        if minElements == optElements == 0:
-            raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
-
-        if optElements:
-            def makeOptionalList(n):
-                if n > 1:
-                    return Optional(self + makeOptionalList(n - 1))
-                else:
-                    return Optional(self)
-            if minElements:
-                if minElements == 1:
-                    ret = self + makeOptionalList(optElements)
-                else:
-                    ret = And([self] * minElements) + makeOptionalList(optElements)
-            else:
-                ret = makeOptionalList(optElements)
-        else:
-            if minElements == 1:
-                ret = self
-            else:
-                ret = And([self] * minElements)
-        return ret
-
-    def __rmul__(self, other):
-        return self.__mul__(other)
-
-    def __or__(self, other):
-        """
-        Implementation of | operator - returns :class:`MatchFirst`
-        """
-        if other is Ellipsis:
-            return _PendingSkip(self, must_skip=True)
-
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return MatchFirst([self, other])
-
-    def __ror__(self, other):
-        """
-        Implementation of | operator when left operand is not a :class:`ParserElement`
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return other | self
-
-    def __xor__(self, other):
-        """
-        Implementation of ^ operator - returns :class:`Or`
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return Or([self, other])
-
-    def __rxor__(self, other):
-        """
-        Implementation of ^ operator when left operand is not a :class:`ParserElement`
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return other ^ self
-
-    def __and__(self, other):
-        """
-        Implementation of & operator - returns :class:`Each`
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return Each([self, other])
-
-    def __rand__(self, other):
-        """
-        Implementation of & operator when left operand is not a :class:`ParserElement`
-        """
-        if isinstance(other, basestring):
-            other = self._literalStringClass(other)
-        if not isinstance(other, ParserElement):
-            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
-                          SyntaxWarning, stacklevel=2)
-            return None
-        return other & self
-
-    def __invert__(self):
-        """
-        Implementation of ~ operator - returns :class:`NotAny`
-        """
-        return NotAny(self)
-
-    def __iter__(self):
-        # must implement __iter__ to override legacy use of sequential access to __getitem__ to
-        # iterate over a sequence
-        raise TypeError('%r object is not iterable' % self.__class__.__name__)
-
-    def __getitem__(self, key):
-        """
-        use ``[]`` indexing notation as a short form for expression repetition:
-         - ``expr[n]`` is equivalent to ``expr*n``
-         - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
-         - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
-              to ``expr*n + ZeroOrMore(expr)``
-              (read as "at least n instances of ``expr``")
-         - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
-              (read as "0 to n instances of ``expr``")
-         - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
-         - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
-         ``None`` may be used in place of ``...``.
-
-        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
-        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
-        desired, then write ``expr[..., n] + ~expr``.
-       """
-
-        # convert single arg keys to tuples
-        try:
-            if isinstance(key, str):
-                key = (key,)
-            iter(key)
-        except TypeError:
-            key = (key, key)
-
-        if len(key) > 2:
-            warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
-                                                                                '... [{0}]'.format(len(key))
-                                                                                if len(key) > 5 else ''))
-
-        # clip to 2 elements
-        ret = self * tuple(key[:2])
-        return ret
-
-    def __call__(self, name=None):
-        """
-        Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
-
-        If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
-        passed as ``True``.
-
-        If ``name` is omitted, same as calling :class:`copy`.
-
-        Example::
-
-            # these are equivalent
-            userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
-            userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
-        """
-        if name is not None:
-            return self._setResultsName(name)
-        else:
-            return self.copy()
-
-    def suppress(self):
-        """
-        Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
-        cluttering up returned output.
-        """
-        return Suppress(self)
-
-    def leaveWhitespace(self):
-        """
-        Disables the skipping of whitespace before matching the characters in the
-        :class:`ParserElement`'s defined pattern.  This is normally only used internally by
-        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
-        """
-        self.skipWhitespace = False
-        return self
-
-    def setWhitespaceChars(self, chars):
-        """
-        Overrides the default whitespace chars
-        """
-        self.skipWhitespace = True
-        self.whiteChars = chars
-        self.copyDefaultWhiteChars = False
-        return self
-
-    def parseWithTabs(self):
-        """
-        Overrides default behavior to expand ````s to spaces before parsing the input string.
-        Must be called before ``parseString`` when the input grammar contains elements that
-        match ```` characters.
-        """
-        self.keepTabs = True
-        return self
-
-    def ignore(self, other):
-        """
-        Define expression to be ignored (e.g., comments) while doing pattern
-        matching; may be called repeatedly, to define multiple comment or other
-        ignorable patterns.
-
-        Example::
-
-            patt = OneOrMore(Word(alphas))
-            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
-
-            patt.ignore(cStyleComment)
-            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
-        """
-        if isinstance(other, basestring):
-            other = Suppress(other)
-
-        if isinstance(other, Suppress):
-            if other not in self.ignoreExprs:
-                self.ignoreExprs.append(other)
-        else:
-            self.ignoreExprs.append(Suppress(other.copy()))
-        return self
-
-    def setDebugActions(self, startAction, successAction, exceptionAction):
-        """
-        Enable display of debugging messages while doing pattern matching.
-        """
-        self.debugActions = (startAction or _defaultStartDebugAction,
-                             successAction or _defaultSuccessDebugAction,
-                             exceptionAction or _defaultExceptionDebugAction)
-        self.debug = True
-        return self
-
-    def setDebug(self, flag=True):
-        """
-        Enable display of debugging messages while doing pattern matching.
-        Set ``flag`` to True to enable, False to disable.
-
-        Example::
-
-            wd = Word(alphas).setName("alphaword")
-            integer = Word(nums).setName("numword")
-            term = wd | integer
-
-            # turn on debugging for wd
-            wd.setDebug()
-
-            OneOrMore(term).parseString("abc 123 xyz 890")
-
-        prints::
-
-            Match alphaword at loc 0(1,1)
-            Matched alphaword -> ['abc']
-            Match alphaword at loc 3(1,4)
-            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
-            Match alphaword at loc 7(1,8)
-            Matched alphaword -> ['xyz']
-            Match alphaword at loc 11(1,12)
-            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
-            Match alphaword at loc 15(1,16)
-            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
-
-        The output shown is that produced by the default debug actions - custom debug actions can be
-        specified using :class:`setDebugActions`. Prior to attempting
-        to match the ``wd`` expression, the debugging message ``"Match  at loc (,)"``
-        is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
-        message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
-        which makes debugging and exception messages easier to understand - for instance, the default
-        name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
-        """
-        if flag:
-            self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
-        else:
-            self.debug = False
-        return self
-
-    def __str__(self):
-        return self.name
-
-    def __repr__(self):
-        return _ustr(self)
-
-    def streamline(self):
-        self.streamlined = True
-        self.strRepr = None
-        return self
-
-    def checkRecursion(self, parseElementList):
-        pass
-
-    def validate(self, validateTrace=None):
-        """
-        Check defined expressions for valid structure, check for infinite recursive definitions.
-        """
-        self.checkRecursion([])
-
-    def parseFile(self, file_or_filename, parseAll=False):
-        """
-        Execute the parse expression on the given file or filename.
-        If a filename is specified (instead of a file object),
-        the entire file is opened, read, and closed before parsing.
-        """
-        try:
-            file_contents = file_or_filename.read()
-        except AttributeError:
-            with open(file_or_filename, "r") as f:
-                file_contents = f.read()
-        try:
-            return self.parseString(file_contents, parseAll)
-        except ParseBaseException as exc:
-            if ParserElement.verbose_stacktrace:
-                raise
-            else:
-                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
-                if getattr(exc, '__traceback__', None) is not None:
-                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
-                raise exc
-
-    def __eq__(self, other):
-        if self is other:
-            return True
-        elif isinstance(other, basestring):
-            return self.matches(other)
-        elif isinstance(other, ParserElement):
-            return vars(self) == vars(other)
-        return False
-
-    def __ne__(self, other):
-        return not (self == other)
-
-    def __hash__(self):
-        return id(self)
-
-    def __req__(self, other):
-        return self == other
-
-    def __rne__(self, other):
-        return not (self == other)
-
-    def matches(self, testString, parseAll=True):
-        """
-        Method for quick testing of a parser against a test string. Good for simple
-        inline microtests of sub expressions while building up larger parser.
-
-        Parameters:
-         - testString - to test against this expression for a match
-         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
-
-        Example::
-
-            expr = Word(nums)
-            assert expr.matches("100")
-        """
-        try:
-            self.parseString(_ustr(testString), parseAll=parseAll)
-            return True
-        except ParseBaseException:
-            return False
-
-    def runTests(self, tests, parseAll=True, comment='#',
-                 fullDump=True, printResults=True, failureTests=False, postParse=None,
-                 file=None):
-        """
-        Execute the parse expression on a series of test strings, showing each
-        test, the parsed results or where the parse failed. Quick and easy way to
-        run a parse expression against a list of sample strings.
-
-        Parameters:
-         - tests - a list of separate test strings, or a multiline string of test strings
-         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
-         - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
-              string; pass None to disable comment filtering
-         - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
-              if False, only dump nested list
-         - printResults - (default= ``True``) prints test output to stdout
-         - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
-         - postParse - (default= ``None``) optional callback for successful parse results; called as
-              `fn(test_string, parse_results)` and returns a string to be added to the test output
-         - file - (default=``None``) optional file-like object to which test output will be written;
-              if None, will default to ``sys.stdout``
-
-        Returns: a (success, results) tuple, where success indicates that all tests succeeded
-        (or failed if ``failureTests`` is True), and the results contain a list of lines of each
-        test's output
-
-        Example::
-
-            number_expr = pyparsing_common.number.copy()
-
-            result = number_expr.runTests('''
-                # unsigned integer
-                100
-                # negative integer
-                -100
-                # float with scientific notation
-                6.02e23
-                # integer with scientific notation
-                1e-12
-                ''')
-            print("Success" if result[0] else "Failed!")
-
-            result = number_expr.runTests('''
-                # stray character
-                100Z
-                # missing leading digit before '.'
-                -.100
-                # too many '.'
-                3.14.159
-                ''', failureTests=True)
-            print("Success" if result[0] else "Failed!")
-
-        prints::
-
-            # unsigned integer
-            100
-            [100]
-
-            # negative integer
-            -100
-            [-100]
-
-            # float with scientific notation
-            6.02e23
-            [6.02e+23]
-
-            # integer with scientific notation
-            1e-12
-            [1e-12]
-
-            Success
-
-            # stray character
-            100Z
-               ^
-            FAIL: Expected end of text (at char 3), (line:1, col:4)
-
-            # missing leading digit before '.'
-            -.100
-            ^
-            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
-
-            # too many '.'
-            3.14.159
-                ^
-            FAIL: Expected end of text (at char 4), (line:1, col:5)
-
-            Success
-
-        Each test string must be on a single line. If you want to test a string that spans multiple
-        lines, create a test like this::
-
-            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
-
-        (Note that this is a raw string literal, you must include the leading 'r'.)
-        """
-        if isinstance(tests, basestring):
-            tests = list(map(str.strip, tests.rstrip().splitlines()))
-        if isinstance(comment, basestring):
-            comment = Literal(comment)
-        if file is None:
-            file = sys.stdout
-        print_ = file.write
-
-        allResults = []
-        comments = []
-        success = True
-        NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
-        BOM = u'\ufeff'
-        for t in tests:
-            if comment is not None and comment.matches(t, False) or comments and not t:
-                comments.append(t)
-                continue
-            if not t:
-                continue
-            out = ['\n' + '\n'.join(comments) if comments else '', t]
-            comments = []
-            try:
-                # convert newline marks to actual newlines, and strip leading BOM if present
-                t = NL.transformString(t.lstrip(BOM))
-                result = self.parseString(t, parseAll=parseAll)
-            except ParseBaseException as pe:
-                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
-                if '\n' in t:
-                    out.append(line(pe.loc, t))
-                    out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
-                else:
-                    out.append(' ' * pe.loc + '^' + fatal)
-                out.append("FAIL: " + str(pe))
-                success = success and failureTests
-                result = pe
-            except Exception as exc:
-                out.append("FAIL-EXCEPTION: " + str(exc))
-                success = success and failureTests
-                result = exc
-            else:
-                success = success and not failureTests
-                if postParse is not None:
-                    try:
-                        pp_value = postParse(t, result)
-                        if pp_value is not None:
-                            if isinstance(pp_value, ParseResults):
-                                out.append(pp_value.dump())
-                            else:
-                                out.append(str(pp_value))
-                        else:
-                            out.append(result.dump())
-                    except Exception as e:
-                        out.append(result.dump(full=fullDump))
-                        out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
-                else:
-                    out.append(result.dump(full=fullDump))
-
-            if printResults:
-                if fullDump:
-                    out.append('')
-                print_('\n'.join(out))
-
-            allResults.append((t, result))
-
-        return success, allResults
-
-
-class _PendingSkip(ParserElement):
-    # internal placeholder class to hold a place were '...' is added to a parser element,
-    # once another ParserElement is added, this placeholder will be replaced with a SkipTo
-    def __init__(self, expr, must_skip=False):
-        super(_PendingSkip, self).__init__()
-        self.strRepr = str(expr + Empty()).replace('Empty', '...')
-        self.name = self.strRepr
-        self.anchor = expr
-        self.must_skip = must_skip
-
-    def __add__(self, other):
-        skipper = SkipTo(other).setName("...")("_skipped*")
-        if self.must_skip:
-            def must_skip(t):
-                if not t._skipped or t._skipped.asList() == ['']:
-                    del t[0]
-                    t.pop("_skipped", None)
-            def show_skip(t):
-                if t._skipped.asList()[-1:] == ['']:
-                    skipped = t.pop('_skipped')
-                    t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
-            return (self.anchor + skipper().addParseAction(must_skip)
-                    | skipper().addParseAction(show_skip)) + other
-
-        return self.anchor + skipper + other
-
-    def __repr__(self):
-        return self.strRepr
-
-    def parseImpl(self, *args):
-        raise Exception("use of `...` expression without following SkipTo target expression")
-
-
-class Token(ParserElement):
-    """Abstract :class:`ParserElement` subclass, for defining atomic
-    matching patterns.
-    """
-    def __init__(self):
-        super(Token, self).__init__(savelist=False)
-
-
-class Empty(Token):
-    """An empty token, will always match.
-    """
-    def __init__(self):
-        super(Empty, self).__init__()
-        self.name = "Empty"
-        self.mayReturnEmpty = True
-        self.mayIndexError = False
-
-
-class NoMatch(Token):
-    """A token that will never match.
-    """
-    def __init__(self):
-        super(NoMatch, self).__init__()
-        self.name = "NoMatch"
-        self.mayReturnEmpty = True
-        self.mayIndexError = False
-        self.errmsg = "Unmatchable token"
-
-    def parseImpl(self, instring, loc, doActions=True):
-        raise ParseException(instring, loc, self.errmsg, self)
-
-
-class Literal(Token):
-    """Token to exactly match a specified string.
-
-    Example::
-
-        Literal('blah').parseString('blah')  # -> ['blah']
-        Literal('blah').parseString('blahfooblah')  # -> ['blah']
-        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
-
-    For case-insensitive matching, use :class:`CaselessLiteral`.
-
-    For keyword matching (force word break before and after the matched string),
-    use :class:`Keyword` or :class:`CaselessKeyword`.
-    """
-    def __init__(self, matchString):
-        super(Literal, self).__init__()
-        self.match = matchString
-        self.matchLen = len(matchString)
-        try:
-            self.firstMatchChar = matchString[0]
-        except IndexError:
-            warnings.warn("null string passed to Literal; use Empty() instead",
-                            SyntaxWarning, stacklevel=2)
-            self.__class__ = Empty
-        self.name = '"%s"' % _ustr(self.match)
-        self.errmsg = "Expected " + self.name
-        self.mayReturnEmpty = False
-        self.mayIndexError = False
-
-        # Performance tuning: modify __class__ to select
-        # a parseImpl optimized for single-character check
-        if self.matchLen == 1 and type(self) is Literal:
-            self.__class__ = _SingleCharLiteral
-
-    def parseImpl(self, instring, loc, doActions=True):
-        if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
-            return loc + self.matchLen, self.match
-        raise ParseException(instring, loc, self.errmsg, self)
-
-class _SingleCharLiteral(Literal):
-    def parseImpl(self, instring, loc, doActions=True):
-        if instring[loc] == self.firstMatchChar:
-            return loc + 1, self.match
-        raise ParseException(instring, loc, self.errmsg, self)
-
-_L = Literal
-ParserElement._literalStringClass = Literal
-
-class Keyword(Token):
-    """Token to exactly match a specified string as a keyword, that is,
-    it must be immediately followed by a non-keyword character.  Compare
-    with :class:`Literal`:
-
-     - ``Literal("if")`` will match the leading ``'if'`` in
-       ``'ifAndOnlyIf'``.
-     - ``Keyword("if")`` will not; it will only match the leading
-       ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
-
-    Accepts two optional constructor arguments in addition to the
-    keyword string:
-
-     - ``identChars`` is a string of characters that would be valid
-       identifier characters, defaulting to all alphanumerics + "_" and
-       "$"
-     - ``caseless`` allows case-insensitive matching, default is ``False``.
-
-    Example::
-
-        Keyword("start").parseString("start")  # -> ['start']
-        Keyword("start").parseString("starting")  # -> Exception
-
-    For case-insensitive matching, use :class:`CaselessKeyword`.
-    """
-    DEFAULT_KEYWORD_CHARS = alphanums + "_$"
-
-    def __init__(self, matchString, identChars=None, caseless=False):
-        super(Keyword, self).__init__()
-        if identChars is None:
-            identChars = Keyword.DEFAULT_KEYWORD_CHARS
-        self.match = matchString
-        self.matchLen = len(matchString)
-        try:
-            self.firstMatchChar = matchString[0]
-        except IndexError:
-            warnings.warn("null string passed to Keyword; use Empty() instead",
-                          SyntaxWarning, stacklevel=2)
-        self.name = '"%s"' % self.match
-        self.errmsg = "Expected " + self.name
-        self.mayReturnEmpty = False
-        self.mayIndexError = False
-        self.caseless = caseless
-        if caseless:
-            self.caselessmatch = matchString.upper()
-            identChars = identChars.upper()
-        self.identChars = set(identChars)
-
-    def parseImpl(self, instring, loc, doActions=True):
-        if self.caseless:
-            if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
-                    and (loc >= len(instring) - self.matchLen
-                         or instring[loc + self.matchLen].upper() not in self.identChars)
-                    and (loc == 0
-                         or instring[loc - 1].upper() not in self.identChars)):
-                return loc + self.matchLen, self.match
-
-        else:
-            if instring[loc] == self.firstMatchChar:
-                if ((self.matchLen == 1 or instring.startswith(self.match, loc))
-                        and (loc >= len(instring) - self.matchLen
-                             or instring[loc + self.matchLen] not in self.identChars)
-                        and (loc == 0 or instring[loc - 1] not in self.identChars)):
-                    return loc + self.matchLen, self.match
-
-        raise ParseException(instring, loc, self.errmsg, self)
-
-    def copy(self):
-        c = super(Keyword, self).copy()
-        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
-        return c
-
-    @staticmethod
-    def setDefaultKeywordChars(chars):
-        """Overrides the default Keyword chars
-        """
-        Keyword.DEFAULT_KEYWORD_CHARS = chars
-
-class CaselessLiteral(Literal):
-    """Token to match a specified string, ignoring case of letters.
-    Note: the matched results will always be in the case of the given
-    match string, NOT the case of the input text.
-
-    Example::
-
-        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
-
-    (Contrast with example for :class:`CaselessKeyword`.)
-    """
-    def __init__(self, matchString):
-        super(CaselessLiteral, self).__init__(matchString.upper())
-        # Preserve the defining literal.
-        self.returnString = matchString
-        self.name = "'%s'" % self.returnString
-        self.errmsg = "Expected " + self.name
-
-    def parseImpl(self, instring, loc, doActions=True):
-        if instring[loc:loc + self.matchLen].upper() == self.match:
-            return loc + self.matchLen, self.returnString
-        raise ParseException(instring, loc, self.errmsg, self)
-
-class CaselessKeyword(Keyword):
-    """
-    Caseless version of :class:`Keyword`.
-
-    Example::
-
-        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
-
-    (Contrast with example for :class:`CaselessLiteral`.)
-    """
-    def __init__(self, matchString, identChars=None):
-        super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
-
-class CloseMatch(Token):
-    """A variation on :class:`Literal` which matches "close" matches,
-    that is, strings with at most 'n' mismatching characters.
-    :class:`CloseMatch` takes parameters:
-
-     - ``match_string`` - string to be matched
-     - ``maxMismatches`` - (``default=1``) maximum number of
-       mismatches allowed to count as a match
-
-    The results from a successful parse will contain the matched text
-    from the input string and the following named results:
-
-     - ``mismatches`` - a list of the positions within the
-       match_string where mismatches were found
-     - ``original`` - the original match_string used to compare
-       against the input string
-
-    If ``mismatches`` is an empty list, then the match was an exact
-    match.
-
-    Example::
-
-        patt = CloseMatch("ATCATCGAATGGA")
-        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
-        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
-
-        # exact match
-        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
-
-        # close match allowing up to 2 mismatches
-        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
-        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
-    """
-    def __init__(self, match_string, maxMismatches=1):
-        super(CloseMatch, self).__init__()
-        self.name = match_string
-        self.match_string = match_string
-        self.maxMismatches = maxMismatches
-        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
-        self.mayIndexError = False
-        self.mayReturnEmpty = False
-
-    def parseImpl(self, instring, loc, doActions=True):
-        start = loc
-        instrlen = len(instring)
-        maxloc = start + len(self.match_string)
-
-        if maxloc <= instrlen:
-            match_string = self.match_string
-            match_stringloc = 0
-            mismatches = []
-            maxMismatches = self.maxMismatches
-
-            for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
-                src, mat = s_m
-                if src != mat:
-                    mismatches.append(match_stringloc)
-                    if len(mismatches) > maxMismatches:
-                        break
-            else:
-                loc = match_stringloc + 1
-                results = ParseResults([instring[start:loc]])
-                results['original'] = match_string
-                results['mismatches'] = mismatches
-                return loc, results
-
-        raise ParseException(instring, loc, self.errmsg, self)
-
-
-class Word(Token):
-    """Token for matching words composed of allowed character sets.
-    Defined with string containing all allowed initial characters, an
-    optional string containing allowed body characters (if omitted,
-    defaults to the initial character set), and an optional minimum,
-    maximum, and/or exact length.  The default value for ``min`` is
-    1 (a minimum value < 1 is not valid); the default values for
-    ``max`` and ``exact`` are 0, meaning no maximum or exact
-    length restriction. An optional ``excludeChars`` parameter can
-    list characters that might be found in the input ``bodyChars``
-    string; useful to define a word of all printables except for one or
-    two characters, for instance.
-
-    :class:`srange` is useful for defining custom character set strings
-    for defining ``Word`` expressions, using range notation from
-    regular expression character sets.
-
-    A common mistake is to use :class:`Word` to match a specific literal
-    string, as in ``Word("Address")``. Remember that :class:`Word`
-    uses the string argument to define *sets* of matchable characters.
-    This expression would match "Add", "AAA", "dAred", or any other word
-    made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
-    exact literal string, use :class:`Literal` or :class:`Keyword`.
-
-    pyparsing includes helper strings for building Words:
-
-     - :class:`alphas`
-     - :class:`nums`
-     - :class:`alphanums`
-     - :class:`hexnums`
-     - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
-       - accented, tilded, umlauted, etc.)
-     - :class:`punc8bit` (non-alphabetic characters in ASCII range
-       128-255 - currency, symbols, superscripts, diacriticals, etc.)
-     - :class:`printables` (any non-whitespace character)
-
-    Example::
-
-        # a word composed of digits
-        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
-
-        # a word with a leading capital, and zero or more lowercase
-        capital_word = Word(alphas.upper(), alphas.lower())
-
-        # hostnames are alphanumeric, with leading alpha, and '-'
-        hostname = Word(alphas, alphanums + '-')
-
-        # roman numeral (not a strict parser, accepts invalid mix of characters)
-        roman = Word("IVXLCDM")
-
-        # any string of non-whitespace characters, except for ','
-        csv_value = Word(printables, excludeChars=",")
-    """
-    def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
-        super(Word, self).__init__()
-        if excludeChars:
-            excludeChars = set(excludeChars)
-            initChars = ''.join(c for c in initChars if c not in excludeChars)
-            if bodyChars:
-                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
-        self.initCharsOrig = initChars
-        self.initChars = set(initChars)
-        if bodyChars:
-            self.bodyCharsOrig = bodyChars
-            self.bodyChars = set(bodyChars)
-        else:
-            self.bodyCharsOrig = initChars
-            self.bodyChars = set(initChars)
-
-        self.maxSpecified = max > 0
-
-        if min < 1:
-            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
-
-        self.minLen = min
-
-        if max > 0:
-            self.maxLen = max
-        else:
-            self.maxLen = _MAX_INT
-
-        if exact > 0:
-            self.maxLen = exact
-            self.minLen = exact
-
-        self.name = _ustr(self)
-        self.errmsg = "Expected " + self.name
-        self.mayIndexError = False
-        self.asKeyword = asKeyword
-
-        if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
-            if self.bodyCharsOrig == self.initCharsOrig:
-                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
-            elif len(self.initCharsOrig) == 1:
-                self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
-                                             _escapeRegexRangeChars(self.bodyCharsOrig),)
-            else:
-                self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
-                                               _escapeRegexRangeChars(self.bodyCharsOrig),)
-            if self.asKeyword:
-                self.reString = r"\b" + self.reString + r"\b"
-
-            try:
-                self.re = re.compile(self.reString)
-            except Exception:
-                self.re = None
-            else:
-                self.re_match = self.re.match
-                self.__class__ = _WordRegex
-
-    def parseImpl(self, instring, loc, doActions=True):
-        if instring[loc] not in self.initChars:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-        start = loc
-        loc += 1
-        instrlen = len(instring)
-        bodychars = self.bodyChars
-        maxloc = start + self.maxLen
-        maxloc = min(maxloc, instrlen)
-        while loc < maxloc and instring[loc] in bodychars:
-            loc += 1
-
-        throwException = False
-        if loc - start < self.minLen:
-            throwException = True
-        elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
-            throwException = True
-        elif self.asKeyword:
-            if (start > 0 and instring[start - 1] in bodychars
-                    or loc < instrlen and instring[loc] in bodychars):
-                throwException = True
-
-        if throwException:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-        return loc, instring[start:loc]
-
-    def __str__(self):
-        try:
-            return super(Word, self).__str__()
-        except Exception:
-            pass
-
-        if self.strRepr is None:
-
-            def charsAsStr(s):
-                if len(s) > 4:
-                    return s[:4] + "..."
-                else:
-                    return s
-
-            if self.initCharsOrig != self.bodyCharsOrig:
-                self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
-            else:
-                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
-
-        return self.strRepr
-
-class _WordRegex(Word):
-    def parseImpl(self, instring, loc, doActions=True):
-        result = self.re_match(instring, loc)
-        if not result:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-        loc = result.end()
-        return loc, result.group()
-
-
-class Char(_WordRegex):
-    """A short-cut class for defining ``Word(characters, exact=1)``,
-    when defining a match of any single character in a string of
-    characters.
-    """
-    def __init__(self, charset, asKeyword=False, excludeChars=None):
-        super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
-        self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
-        if asKeyword:
-            self.reString = r"\b%s\b" % self.reString
-        self.re = re.compile(self.reString)
-        self.re_match = self.re.match
-
-
-class Regex(Token):
-    r"""Token for matching strings that match a given regular
-    expression. Defined with string specifying the regular expression in
-    a form recognized by the stdlib Python  `re module `_.
-    If the given regex contains named groups (defined using ``(?P...)``),
-    these will be preserved as named parse results.
-
-    If instead of the Python stdlib re module you wish to use a different RE module
-    (such as the `regex` module), you can replace it by either building your
-    Regex object with a compiled RE that was compiled using regex:
-
-    Example::
-
-        realnum = Regex(r"[+-]?\d+\.\d*")
-        date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)')
-        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
-        roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
-
-        # use regex module instead of stdlib re module to construct a Regex using
-        # a compiled regular expression
-        import regex
-        parser = pp.Regex(regex.compile(r'[0-9]'))
-
-    """
-    def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
-        """The parameters ``pattern`` and ``flags`` are passed
-        to the ``re.compile()`` function as-is. See the Python
-        `re module `_ module for an
-        explanation of the acceptable patterns and flags.
-        """
-        super(Regex, self).__init__()
-
-        if isinstance(pattern, basestring):
-            if not pattern:
-                warnings.warn("null string passed to Regex; use Empty() instead",
-                              SyntaxWarning, stacklevel=2)
-
-            self.pattern = pattern
-            self.flags = flags
-
-            try:
-                self.re = re.compile(self.pattern, self.flags)
-                self.reString = self.pattern
-            except sre_constants.error:
-                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
-                              SyntaxWarning, stacklevel=2)
-                raise
-
-        elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
-            self.re = pattern
-            self.pattern = self.reString = pattern.pattern
-            self.flags = flags
-
-        else:
-            raise TypeError("Regex may only be constructed with a string or a compiled RE object")
-
-        self.re_match = self.re.match
-
-        self.name = _ustr(self)
-        self.errmsg = "Expected " + self.name
-        self.mayIndexError = False
-        self.mayReturnEmpty = self.re_match("") is not None
-        self.asGroupList = asGroupList
-        self.asMatch = asMatch
-        if self.asGroupList:
-            self.parseImpl = self.parseImplAsGroupList
-        if self.asMatch:
-            self.parseImpl = self.parseImplAsMatch
-
-    def parseImpl(self, instring, loc, doActions=True):
-        result = self.re_match(instring, loc)
-        if not result:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-        loc = result.end()
-        ret = ParseResults(result.group())
-        d = result.groupdict()
-        if d:
-            for k, v in d.items():
-                ret[k] = v
-        return loc, ret
-
-    def parseImplAsGroupList(self, instring, loc, doActions=True):
-        result = self.re_match(instring, loc)
-        if not result:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-        loc = result.end()
-        ret = result.groups()
-        return loc, ret
-
-    def parseImplAsMatch(self, instring, loc, doActions=True):
-        result = self.re_match(instring, loc)
-        if not result:
-            raise ParseException(instring, loc, self.errmsg, self)
-
-        loc = result.end()
-        ret = result
-        return loc, ret
-
-    def __str__(self):
-        try:
-            return super(Regex, self).__str__()
-        except Exception:
-            pass
-
-        if self.strRepr is None:
-            self.strRepr = "Re:(%s)" % repr(self.pattern)
-
-        return self.strRepr
-
-    def sub(self, repl):
-        r"""
-        Return Regex with an attached parse action to transform the parsed
-        result as if called using `re.sub(expr, repl, string) `_.
-
-        Example::
-
-            make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2")
-            print(make_html.transformString("h1:main title:"))
-            # prints "

main title

" - """ - if self.asGroupList: - warnings.warn("cannot use sub() with Regex(asGroupList=True)", - SyntaxWarning, stacklevel=2) - raise SyntaxError() - - if self.asMatch and callable(repl): - warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", - SyntaxWarning, stacklevel=2) - raise SyntaxError() - - if self.asMatch: - def pa(tokens): - return tokens[0].expand(repl) - else: - def pa(tokens): - return self.re.sub(repl, tokens[0]) - return self.addParseAction(pa) - -class QuotedString(Token): - r""" - Token for matching strings that are delimited by quoting characters. - - Defined with the following parameters: - - - quoteChar - string of one or more characters defining the - quote delimiting string - - escChar - character to escape quotes, typically backslash - (default= ``None``) - - escQuote - special quote sequence to escape an embedded quote - string (such as SQL's ``""`` to escape an embedded ``"``) - (default= ``None``) - - multiline - boolean indicating whether quotes can span - multiple lines (default= ``False``) - - unquoteResults - boolean indicating whether the matched text - should be unquoted (default= ``True``) - - endQuoteChar - string of one or more characters defining the - end of the quote delimited string (default= ``None`` => same as - quoteChar) - - convertWhitespaceEscapes - convert escaped whitespace - (``'\t'``, ``'\n'``, etc.) to actual whitespace - (default= ``True``) - - Example:: - - qs = QuotedString('"') - print(qs.searchString('lsjdf "This is the quote" sldjf')) - complex_qs = QuotedString('{{', endQuoteChar='}}') - print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) - sql_qs = QuotedString('"', escQuote='""') - print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) - - prints:: - - [['This is the quote']] - [['This is the "quote"']] - [['This is the quote with "embedded" quotes']] - """ - def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, - unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): - super(QuotedString, self).__init__() - - # remove white space from quote chars - wont work anyway - quoteChar = quoteChar.strip() - if not quoteChar: - warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) - raise SyntaxError() - - if endQuoteChar is None: - endQuoteChar = quoteChar - else: - endQuoteChar = endQuoteChar.strip() - if not endQuoteChar: - warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) - raise SyntaxError() - - self.quoteChar = quoteChar - self.quoteCharLen = len(quoteChar) - self.firstQuoteChar = quoteChar[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - self.convertWhitespaceEscapes = convertWhitespaceEscapes - - if multiline: - self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '')) - else: - self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '')) - if len(self.endQuoteChar) > 1: - self.pattern += ( - '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')') - - if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) - if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar) + "(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - self.re_match = self.re.match - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None - if not result: - raise ParseException(instring, loc, self.errmsg, self) - - loc = result.end() - ret = result.group() - - if self.unquoteResults: - - # strip off quotes - ret = ret[self.quoteCharLen: -self.endQuoteCharLen] - - if isinstance(ret, basestring): - # replace escaped whitespace - if '\\' in ret and self.convertWhitespaceEscapes: - ws_map = { - r'\t': '\t', - r'\n': '\n', - r'\f': '\f', - r'\r': '\r', - } - for wslit, wschar in ws_map.items(): - ret = ret.replace(wslit, wschar) - - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) - - # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) - - return loc, ret - - def __str__(self): - try: - return super(QuotedString, self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - - return self.strRepr - - -class CharsNotIn(Token): - """Token for matching words composed of characters *not* in a given - set (will include whitespace in matched characters if not listed in - the provided exclusion set - see example). Defined with string - containing all disallowed characters, and an optional minimum, - maximum, and/or exact length. The default value for ``min`` is - 1 (a minimum value < 1 is not valid); the default values for - ``max`` and ``exact`` are 0, meaning no maximum or exact - length restriction. - - Example:: - - # define a comma-separated-value as anything that is not a ',' - csv_value = CharsNotIn(',') - print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) - - prints:: - - ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] - """ - def __init__(self, notChars, min=1, max=0, exact=0): - super(CharsNotIn, self).__init__() - self.skipWhitespace = False - self.notChars = notChars - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use " - "Optional(CharsNotIn()) if zero-length char group is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = (self.minLen == 0) - self.mayIndexError = False - - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] in self.notChars: - raise ParseException(instring, loc, self.errmsg, self) - - start = loc - loc += 1 - notchars = self.notChars - maxlen = min(start + self.maxLen, len(instring)) - while loc < maxlen and instring[loc] not in notchars: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - def __str__(self): - try: - return super(CharsNotIn, self).__str__() - except Exception: - pass - - if self.strRepr is None: - if len(self.notChars) > 4: - self.strRepr = "!W:(%s...)" % self.notChars[:4] - else: - self.strRepr = "!W:(%s)" % self.notChars - - return self.strRepr - -class White(Token): - """Special matching class for matching whitespace. Normally, - whitespace is ignored by pyparsing grammars. This class is included - when some whitespace structures are significant. Define with - a string containing the whitespace characters to be matched; default - is ``" \\t\\r\\n"``. Also takes optional ``min``, - ``max``, and ``exact`` arguments, as defined for the - :class:`Word` class. - """ - whiteStrs = { - ' ' : '', - '\t': '', - '\n': '', - '\r': '', - '\f': '', - u'\u00A0': '', - u'\u1680': '', - u'\u180E': '', - u'\u2000': '', - u'\u2001': '', - u'\u2002': '', - u'\u2003': '', - u'\u2004': '', - u'\u2005': '', - u'\u2006': '', - u'\u2007': '', - u'\u2008': '', - u'\u2009': '', - u'\u200A': '', - u'\u200B': '', - u'\u202F': '', - u'\u205F': '', - u'\u3000': '', - } - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White, self).__init__() - self.matchWhite = ws - self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite)) - # ~ self.leaveWhitespace() - self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) - self.mayReturnEmpty = True - self.errmsg = "Expected " + self.name - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def parseImpl(self, instring, loc, doActions=True): - if instring[loc] not in self.matchWhite: - raise ParseException(instring, loc, self.errmsg, self) - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min(maxloc, len(instring)) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - raise ParseException(instring, loc, self.errmsg, self) - - return loc, instring[start:loc] - - -class _PositionToken(Token): - def __init__(self): - super(_PositionToken, self).__init__() - self.name = self.__class__.__name__ - self.mayReturnEmpty = True - self.mayIndexError = False - -class GoToColumn(_PositionToken): - """Token to advance to a specific column of input text; useful for - tabular report scraping. - """ - def __init__(self, colno): - super(GoToColumn, self).__init__() - self.col = colno - - def preParse(self, instring, loc): - if col(loc, instring) != self.col: - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables(instring, loc) - while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col: - loc += 1 - return loc - - def parseImpl(self, instring, loc, doActions=True): - thiscol = col(loc, instring) - if thiscol > self.col: - raise ParseException(instring, loc, "Text not in expected column", self) - newloc = loc + self.col - thiscol - ret = instring[loc: newloc] - return newloc, ret - - -class LineStart(_PositionToken): - r"""Matches if current position is at the beginning of a line within - the parse string - - Example:: - - test = '''\ - AAA this line - AAA and this line - AAA but not this one - B AAA and definitely not this one - ''' - - for t in (LineStart() + 'AAA' + restOfLine).searchString(test): - print(t) - - prints:: - - ['AAA', ' this line'] - ['AAA', ' and this line'] - - """ - def __init__(self): - super(LineStart, self).__init__() - self.errmsg = "Expected start of line" - - def parseImpl(self, instring, loc, doActions=True): - if col(loc, instring) == 1: - return loc, [] - raise ParseException(instring, loc, self.errmsg, self) - -class LineEnd(_PositionToken): - """Matches if current position is at the end of a line within the - parse string - """ - def __init__(self): - super(LineEnd, self).__init__() - self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) - self.errmsg = "Expected end of line" - - def parseImpl(self, instring, loc, doActions=True): - if loc < len(instring): - if instring[loc] == "\n": - return loc + 1, "\n" - else: - raise ParseException(instring, loc, self.errmsg, self) - elif loc == len(instring): - return loc + 1, [] - else: - raise ParseException(instring, loc, self.errmsg, self) - -class StringStart(_PositionToken): - """Matches if current position is at the beginning of the parse - string - """ - def __init__(self): - super(StringStart, self).__init__() - self.errmsg = "Expected start of text" - - def parseImpl(self, instring, loc, doActions=True): - if loc != 0: - # see if entire string up to here is just whitespace and ignoreables - if loc != self.preParse(instring, 0): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - -class StringEnd(_PositionToken): - """Matches if current position is at the end of the parse string - """ - def __init__(self): - super(StringEnd, self).__init__() - self.errmsg = "Expected end of text" - - def parseImpl(self, instring, loc, doActions=True): - if loc < len(instring): - raise ParseException(instring, loc, self.errmsg, self) - elif loc == len(instring): - return loc + 1, [] - elif loc > len(instring): - return loc, [] - else: - raise ParseException(instring, loc, self.errmsg, self) - -class WordStart(_PositionToken): - """Matches if the current position is at the beginning of a Word, - and is not preceded by any character in a given set of - ``wordChars`` (default= ``printables``). To emulate the - ``\b`` behavior of regular expressions, use - ``WordStart(alphanums)``. ``WordStart`` will also match at - the beginning of the string being parsed, or at the beginning of - a line. - """ - def __init__(self, wordChars=printables): - super(WordStart, self).__init__() - self.wordChars = set(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True): - if loc != 0: - if (instring[loc - 1] in self.wordChars - or instring[loc] not in self.wordChars): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - -class WordEnd(_PositionToken): - """Matches if the current position is at the end of a Word, and is - not followed by any character in a given set of ``wordChars`` - (default= ``printables``). To emulate the ``\b`` behavior of - regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` - will also match at the end of the string being parsed, or at the end - of a line. - """ - def __init__(self, wordChars=printables): - super(WordEnd, self).__init__() - self.wordChars = set(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True): - instrlen = len(instring) - if instrlen > 0 and loc < instrlen: - if (instring[loc] in self.wordChars or - instring[loc - 1] not in self.wordChars): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - -class ParseExpression(ParserElement): - """Abstract subclass of ParserElement, for combining and - post-processing parsed tokens. - """ - def __init__(self, exprs, savelist=False): - super(ParseExpression, self).__init__(savelist) - if isinstance(exprs, _generatorType): - exprs = list(exprs) - - if isinstance(exprs, basestring): - self.exprs = [self._literalStringClass(exprs)] - elif isinstance(exprs, ParserElement): - self.exprs = [exprs] - elif isinstance(exprs, Iterable): - exprs = list(exprs) - # if sequence of strings provided, wrap with Literal - if any(isinstance(expr, basestring) for expr in exprs): - exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs) - self.exprs = list(exprs) - else: - try: - self.exprs = list(exprs) - except TypeError: - self.exprs = [exprs] - self.callPreparse = False - - def append(self, other): - self.exprs.append(other) - self.strRepr = None - return self - - def leaveWhitespace(self): - """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on - all contained expressions.""" - self.skipWhitespace = False - self.exprs = [e.copy() for e in self.exprs] - for e in self.exprs: - e.leaveWhitespace() - return self - - def ignore(self, other): - if isinstance(other, Suppress): - if other not in self.ignoreExprs: - super(ParseExpression, self).ignore(other) - for e in self.exprs: - e.ignore(self.ignoreExprs[-1]) - else: - super(ParseExpression, self).ignore(other) - for e in self.exprs: - e.ignore(self.ignoreExprs[-1]) - return self - - def __str__(self): - try: - return super(ParseExpression, self).__str__() - except Exception: - pass - - if self.strRepr is None: - self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs)) - return self.strRepr - - def streamline(self): - super(ParseExpression, self).streamline() - - for e in self.exprs: - e.streamline() - - # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) - # but only if there are no parse actions or resultsNames on the nested And's - # (likewise for Or's and MatchFirst's) - if len(self.exprs) == 2: - other = self.exprs[0] - if (isinstance(other, self.__class__) - and not other.parseAction - and other.resultsName is None - and not other.debug): - self.exprs = other.exprs[:] + [self.exprs[1]] - self.strRepr = None - self.mayReturnEmpty |= other.mayReturnEmpty - self.mayIndexError |= other.mayIndexError - - other = self.exprs[-1] - if (isinstance(other, self.__class__) - and not other.parseAction - and other.resultsName is None - and not other.debug): - self.exprs = self.exprs[:-1] + other.exprs[:] - self.strRepr = None - self.mayReturnEmpty |= other.mayReturnEmpty - self.mayIndexError |= other.mayIndexError - - self.errmsg = "Expected " + _ustr(self) - - return self - - def validate(self, validateTrace=None): - tmp = (validateTrace if validateTrace is not None else [])[:] + [self] - for e in self.exprs: - e.validate(tmp) - self.checkRecursion([]) - - def copy(self): - ret = super(ParseExpression, self).copy() - ret.exprs = [e.copy() for e in self.exprs] - return ret - - def _setResultsName(self, name, listAllMatches=False): - if __diag__.warn_ungrouped_named_tokens_in_collection: - for e in self.exprs: - if isinstance(e, ParserElement) and e.resultsName: - warnings.warn("{0}: setting results name {1!r} on {2} expression " - "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", - name, - type(self).__name__, - e.resultsName), - stacklevel=3) - - return super(ParseExpression, self)._setResultsName(name, listAllMatches) - - -class And(ParseExpression): - """ - Requires all given :class:`ParseExpression` s to be found in the given order. - Expressions may be separated by whitespace. - May be constructed using the ``'+'`` operator. - May also be constructed using the ``'-'`` operator, which will - suppress backtracking. - - Example:: - - integer = Word(nums) - name_expr = OneOrMore(Word(alphas)) - - expr = And([integer("id"), name_expr("name"), integer("age")]) - # more easily written as: - expr = integer("id") + name_expr("name") + integer("age") - """ - - class _ErrorStop(Empty): - def __init__(self, *args, **kwargs): - super(And._ErrorStop, self).__init__(*args, **kwargs) - self.name = '-' - self.leaveWhitespace() - - def __init__(self, exprs, savelist=True): - exprs = list(exprs) - if exprs and Ellipsis in exprs: - tmp = [] - for i, expr in enumerate(exprs): - if expr is Ellipsis: - if i < len(exprs) - 1: - skipto_arg = (Empty() + exprs[i + 1]).exprs[-1] - tmp.append(SkipTo(skipto_arg)("_skipped*")) - else: - raise Exception("cannot construct And with sequence ending in ...") - else: - tmp.append(expr) - exprs[:] = tmp - super(And, self).__init__(exprs, savelist) - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.setWhitespaceChars(self.exprs[0].whiteChars) - self.skipWhitespace = self.exprs[0].skipWhitespace - self.callPreparse = True - - def streamline(self): - # collapse any _PendingSkip's - if self.exprs: - if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) - for e in self.exprs[:-1]): - for i, e in enumerate(self.exprs[:-1]): - if e is None: - continue - if (isinstance(e, ParseExpression) - and e.exprs and isinstance(e.exprs[-1], _PendingSkip)): - e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] - self.exprs[i + 1] = None - self.exprs = [e for e in self.exprs if e is not None] - - super(And, self).streamline() - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - return self - - def parseImpl(self, instring, loc, doActions=True): - # pass False as last arg to _parse for first element, since we already - # pre-parsed the string as part of our And pre-parsing - loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False) - errorStop = False - for e in self.exprs[1:]: - if isinstance(e, And._ErrorStop): - errorStop = True - continue - if errorStop: - try: - loc, exprtokens = e._parse(instring, loc, doActions) - except ParseSyntaxException: - raise - except ParseBaseException as pe: - pe.__traceback__ = None - raise ParseSyntaxException._from_exception(pe) - except IndexError: - raise ParseSyntaxException(instring, len(instring), self.errmsg, self) - else: - loc, exprtokens = e._parse(instring, loc, doActions) - if exprtokens or exprtokens.haskeys(): - resultlist += exprtokens - return loc, resultlist - - def __iadd__(self, other): - if isinstance(other, basestring): - other = self._literalStringClass(other) - return self.append(other) # And([self, other]) - - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] - for e in self.exprs: - e.checkRecursion(subRecCheckList) - if not e.mayReturnEmpty: - break - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - -class Or(ParseExpression): - """Requires that at least one :class:`ParseExpression` is found. If - two expressions match, the expression that matches the longest - string will be used. May be constructed using the ``'^'`` - operator. - - Example:: - - # construct Or using '^' operator - - number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) - print(number.searchString("123 3.1416 789")) - - prints:: - - [['123'], ['3.1416'], ['789']] - """ - def __init__(self, exprs, savelist=False): - super(Or, self).__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - - def streamline(self): - super(Or, self).streamline() - if __compat__.collect_all_And_tokens: - self.saveAsList = any(e.saveAsList for e in self.exprs) - return self - - def parseImpl(self, instring, loc, doActions=True): - maxExcLoc = -1 - maxException = None - matches = [] - for e in self.exprs: - try: - loc2 = e.tryParse(instring, loc) - except ParseException as err: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring, len(instring), e.errmsg, self) - maxExcLoc = len(instring) - else: - # save match among all matches, to retry longest to shortest - matches.append((loc2, e)) - - if matches: - # re-evaluate all matches in descending order of length of match, in case attached actions - # might change whether or how much they match of the input. - matches.sort(key=itemgetter(0), reverse=True) - - if not doActions: - # no further conditions or parse actions to change the selection of - # alternative, so the first match will be the best match - best_expr = matches[0][1] - return best_expr._parse(instring, loc, doActions) - - longest = -1, None - for loc1, expr1 in matches: - if loc1 <= longest[0]: - # already have a longer match than this one will deliver, we are done - return longest - - try: - loc2, toks = expr1._parse(instring, loc, doActions) - except ParseException as err: - err.__traceback__ = None - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - else: - if loc2 >= loc1: - return loc2, toks - # didn't match as much as before - elif loc2 > longest[0]: - longest = loc2, toks - - if longest != (-1, None): - return longest - - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - - def __ixor__(self, other): - if isinstance(other, basestring): - other = self._literalStringClass(other) - return self.append(other) # Or([self, other]) - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] - for e in self.exprs: - e.checkRecursion(subRecCheckList) - - def _setResultsName(self, name, listAllMatches=False): - if (not __compat__.collect_all_And_tokens - and __diag__.warn_multiple_tokens_in_named_alternation): - if any(isinstance(e, And) for e in self.exprs): - warnings.warn("{0}: setting results name {1!r} on {2} expression " - "may only return a single token for an And alternative, " - "in future will return the full list of tokens".format( - "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), - stacklevel=3) - - return super(Or, self)._setResultsName(name, listAllMatches) - - -class MatchFirst(ParseExpression): - """Requires that at least one :class:`ParseExpression` is found. If - two expressions match, the first one listed is the one that will - match. May be constructed using the ``'|'`` operator. - - Example:: - - # construct MatchFirst using '|' operator - - # watch the order of expressions to match - number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) - print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] - - # put more selective expression first - number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) - print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] - """ - def __init__(self, exprs, savelist=False): - super(MatchFirst, self).__init__(exprs, savelist) - if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) - else: - self.mayReturnEmpty = True - - def streamline(self): - super(MatchFirst, self).streamline() - if __compat__.collect_all_And_tokens: - self.saveAsList = any(e.saveAsList for e in self.exprs) - return self - - def parseImpl(self, instring, loc, doActions=True): - maxExcLoc = -1 - maxException = None - for e in self.exprs: - try: - ret = e._parse(instring, loc, doActions) - return ret - except ParseException as err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring, len(instring), e.errmsg, self) - maxExcLoc = len(instring) - - # only got here if no expression matched, raise exception for match that made it the furthest - else: - if maxException is not None: - maxException.msg = self.errmsg - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other): - if isinstance(other, basestring): - other = self._literalStringClass(other) - return self.append(other) # MatchFirst([self, other]) - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] - for e in self.exprs: - e.checkRecursion(subRecCheckList) - - def _setResultsName(self, name, listAllMatches=False): - if (not __compat__.collect_all_And_tokens - and __diag__.warn_multiple_tokens_in_named_alternation): - if any(isinstance(e, And) for e in self.exprs): - warnings.warn("{0}: setting results name {1!r} on {2} expression " - "may only return a single token for an And alternative, " - "in future will return the full list of tokens".format( - "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), - stacklevel=3) - - return super(MatchFirst, self)._setResultsName(name, listAllMatches) - - -class Each(ParseExpression): - """Requires all given :class:`ParseExpression` s to be found, but in - any order. Expressions may be separated by whitespace. - - May be constructed using the ``'&'`` operator. - - Example:: - - color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") - shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") - integer = Word(nums) - shape_attr = "shape:" + shape_type("shape") - posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") - color_attr = "color:" + color("color") - size_attr = "size:" + integer("size") - - # use Each (using operator '&') to accept attributes in any order - # (shape and posn are required, color and size are optional) - shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) - - shape_spec.runTests(''' - shape: SQUARE color: BLACK posn: 100, 120 - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - color:GREEN size:20 shape:TRIANGLE posn:20,40 - ''' - ) - - prints:: - - shape: SQUARE color: BLACK posn: 100, 120 - ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] - - color: BLACK - - posn: ['100', ',', '120'] - - x: 100 - - y: 120 - - shape: SQUARE - - - shape: CIRCLE size: 50 color: BLUE posn: 50,80 - ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] - - color: BLUE - - posn: ['50', ',', '80'] - - x: 50 - - y: 80 - - shape: CIRCLE - - size: 50 - - - color: GREEN size: 20 shape: TRIANGLE posn: 20,40 - ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] - - color: GREEN - - posn: ['20', ',', '40'] - - x: 20 - - y: 40 - - shape: TRIANGLE - - size: 20 - """ - def __init__(self, exprs, savelist=True): - super(Each, self).__init__(exprs, savelist) - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - self.skipWhitespace = True - self.initExprGroups = True - self.saveAsList = True - - def streamline(self): - super(Each, self).streamline() - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) - return self - - def parseImpl(self, instring, loc, doActions=True): - if self.initExprGroups: - self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional)) - opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)] - opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))] - self.optionals = opt1 + opt2 - self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)] - self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)] - self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))] - self.required += self.multirequired - self.initExprGroups = False - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - matchOrder = [] - - keepMatching = True - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired - failed = [] - for e in tmpExprs: - try: - tmpLoc = e.tryParse(instring, tmpLoc) - except ParseException: - failed.append(e) - else: - matchOrder.append(self.opt1map.get(id(e), e)) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - if tmpReqd: - missing = ", ".join(_ustr(e) for e in tmpReqd) - raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing) - - # add any unmatched Optionals, in case they have default values defined - matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt] - - resultlist = [] - for e in matchOrder: - loc, results = e._parse(instring, loc, doActions) - resultlist.append(results) - - finalResults = sum(resultlist, ParseResults([])) - return loc, finalResults - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" - - return self.strRepr - - def checkRecursion(self, parseElementList): - subRecCheckList = parseElementList[:] + [self] - for e in self.exprs: - e.checkRecursion(subRecCheckList) - - -class ParseElementEnhance(ParserElement): - """Abstract subclass of :class:`ParserElement`, for combining and - post-processing parsed tokens. - """ - def __init__(self, expr, savelist=False): - super(ParseElementEnhance, self).__init__(savelist) - if isinstance(expr, basestring): - if issubclass(self._literalStringClass, Token): - expr = self._literalStringClass(expr) - else: - expr = self._literalStringClass(Literal(expr)) - self.expr = expr - self.strRepr = None - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars(expr.whiteChars) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def parseImpl(self, instring, loc, doActions=True): - if self.expr is not None: - return self.expr._parse(instring, loc, doActions, callPreParse=False) - else: - raise ParseException("", loc, self.errmsg, self) - - def leaveWhitespace(self): - self.skipWhitespace = False - self.expr = self.expr.copy() - if self.expr is not None: - self.expr.leaveWhitespace() - return self - - def ignore(self, other): - if isinstance(other, Suppress): - if other not in self.ignoreExprs: - super(ParseElementEnhance, self).ignore(other) - if self.expr is not None: - self.expr.ignore(self.ignoreExprs[-1]) - else: - super(ParseElementEnhance, self).ignore(other) - if self.expr is not None: - self.expr.ignore(self.ignoreExprs[-1]) - return self - - def streamline(self): - super(ParseElementEnhance, self).streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def checkRecursion(self, parseElementList): - if self in parseElementList: - raise RecursiveGrammarException(parseElementList + [self]) - subRecCheckList = parseElementList[:] + [self] - if self.expr is not None: - self.expr.checkRecursion(subRecCheckList) - - def validate(self, validateTrace=None): - if validateTrace is None: - validateTrace = [] - tmp = validateTrace[:] + [self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__(self): - try: - return super(ParseElementEnhance, self).__str__() - except Exception: - pass - - if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr)) - return self.strRepr - - -class FollowedBy(ParseElementEnhance): - """Lookahead matching of the given parse expression. - ``FollowedBy`` does *not* advance the parsing position within - the input string, it only verifies that the specified parse - expression matches at the current position. ``FollowedBy`` - always returns a null token list. If any results names are defined - in the lookahead expression, those *will* be returned for access by - name. - - Example:: - - # use FollowedBy to match a label only if it is followed by a ':' - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() - - prints:: - - [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] - """ - def __init__(self, expr): - super(FollowedBy, self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - # by using self._expr.parse and deleting the contents of the returned ParseResults list - # we keep any named results that were defined in the FollowedBy expression - _, ret = self.expr._parse(instring, loc, doActions=doActions) - del ret[:] - - return loc, ret - - -class PrecededBy(ParseElementEnhance): - """Lookbehind matching of the given parse expression. - ``PrecededBy`` does not advance the parsing position within the - input string, it only verifies that the specified parse expression - matches prior to the current position. ``PrecededBy`` always - returns a null token list, but if a results name is defined on the - given expression, it is returned. - - Parameters: - - - expr - expression that must match prior to the current parse - location - - retreat - (default= ``None``) - (int) maximum number of characters - to lookbehind prior to the current parse location - - If the lookbehind expression is a string, Literal, Keyword, or - a Word or CharsNotIn with a specified exact or maximum length, then - the retreat parameter is not required. Otherwise, retreat must be - specified to give a maximum number of characters to look back from - the current parse position for a lookbehind match. - - Example:: - - # VB-style variable names with type prefixes - int_var = PrecededBy("#") + pyparsing_common.identifier - str_var = PrecededBy("$") + pyparsing_common.identifier - - """ - def __init__(self, expr, retreat=None): - super(PrecededBy, self).__init__(expr) - self.expr = self.expr().leaveWhitespace() - self.mayReturnEmpty = True - self.mayIndexError = False - self.exact = False - if isinstance(expr, str): - retreat = len(expr) - self.exact = True - elif isinstance(expr, (Literal, Keyword)): - retreat = expr.matchLen - self.exact = True - elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: - retreat = expr.maxLen - self.exact = True - elif isinstance(expr, _PositionToken): - retreat = 0 - self.exact = True - self.retreat = retreat - self.errmsg = "not preceded by " + str(expr) - self.skipWhitespace = False - self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) - - def parseImpl(self, instring, loc=0, doActions=True): - if self.exact: - if loc < self.retreat: - raise ParseException(instring, loc, self.errmsg) - start = loc - self.retreat - _, ret = self.expr._parse(instring, start) - else: - # retreat specified a maximum lookbehind window, iterate - test_expr = self.expr + StringEnd() - instring_slice = instring[max(0, loc - self.retreat):loc] - last_expr = ParseException(instring, loc, self.errmsg) - for offset in range(1, min(loc, self.retreat + 1)+1): - try: - # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) - _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) - except ParseBaseException as pbe: - last_expr = pbe - else: - break - else: - raise last_expr - return loc, ret - - -class NotAny(ParseElementEnhance): - """Lookahead to disallow matching with the given parse expression. - ``NotAny`` does *not* advance the parsing position within the - input string, it only verifies that the specified parse expression - does *not* match at the current position. Also, ``NotAny`` does - *not* skip over leading whitespace. ``NotAny`` always returns - a null token list. May be constructed using the '~' operator. - - Example:: - - AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) - - # take care not to mistake keywords for identifiers - ident = ~(AND | OR | NOT) + Word(alphas) - boolean_term = Optional(NOT) + ident - - # very crude boolean expression - to support parenthesis groups and - # operation hierarchy, use infixNotation - boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term) - - # integers that are followed by "." are actually floats - integer = Word(nums) + ~Char(".") - """ - def __init__(self, expr): - super(NotAny, self).__init__(expr) - # ~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, " + _ustr(self.expr) - - def parseImpl(self, instring, loc, doActions=True): - if self.expr.canParseNext(instring, loc): - raise ParseException(instring, loc, self.errmsg, self) - return loc, [] - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "~{" + _ustr(self.expr) + "}" - - return self.strRepr - -class _MultipleMatch(ParseElementEnhance): - def __init__(self, expr, stopOn=None): - super(_MultipleMatch, self).__init__(expr) - self.saveAsList = True - ender = stopOn - if isinstance(ender, basestring): - ender = self._literalStringClass(ender) - self.stopOn(ender) - - def stopOn(self, ender): - if isinstance(ender, basestring): - ender = self._literalStringClass(ender) - self.not_ender = ~ender if ender is not None else None - return self - - def parseImpl(self, instring, loc, doActions=True): - self_expr_parse = self.expr._parse - self_skip_ignorables = self._skipIgnorables - check_ender = self.not_ender is not None - if check_ender: - try_not_ender = self.not_ender.tryParse - - # must be at least one (but first see if we are the stopOn sentinel; - # if so, fail) - if check_ender: - try_not_ender(instring, loc) - loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) - try: - hasIgnoreExprs = (not not self.ignoreExprs) - while 1: - if check_ender: - try_not_ender(instring, loc) - if hasIgnoreExprs: - preloc = self_skip_ignorables(instring, loc) - else: - preloc = loc - loc, tmptokens = self_expr_parse(instring, preloc, doActions) - if tmptokens or tmptokens.haskeys(): - tokens += tmptokens - except (ParseException, IndexError): - pass - - return loc, tokens - - def _setResultsName(self, name, listAllMatches=False): - if __diag__.warn_ungrouped_named_tokens_in_collection: - for e in [self.expr] + getattr(self.expr, 'exprs', []): - if isinstance(e, ParserElement) and e.resultsName: - warnings.warn("{0}: setting results name {1!r} on {2} expression " - "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", - name, - type(self).__name__, - e.resultsName), - stacklevel=3) - - return super(_MultipleMatch, self)._setResultsName(name, listAllMatches) - - -class OneOrMore(_MultipleMatch): - """Repetition of one or more of the given expression. - - Parameters: - - expr - expression that must match one or more times - - stopOn - (default= ``None``) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example:: - - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: BLACK" - OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] - - # use stopOn attribute for OneOrMore to avoid reading label string as part of the data - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] - - # could also be written as - (attr_expr * (1,)).parseString(text).pprint() - """ - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + _ustr(self.expr) + "}..." - - return self.strRepr - -class ZeroOrMore(_MultipleMatch): - """Optional repetition of zero or more of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - stopOn - (default= ``None``) - expression for a terminating sentinel - (only required if the sentinel would ordinarily match the repetition - expression) - - Example: similar to :class:`OneOrMore` - """ - def __init__(self, expr, stopOn=None): - super(ZeroOrMore, self).__init__(expr, stopOn=stopOn) - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - try: - return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) - except (ParseException, IndexError): - return loc, [] - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]..." - - return self.strRepr - - -class _NullToken(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - def __str__(self): - return "" - -class Optional(ParseElementEnhance): - """Optional matching of the given expression. - - Parameters: - - expr - expression that must match zero or more times - - default (optional) - value to be returned if the optional expression is not found. - - Example:: - - # US postal code can be a 5-digit zip, plus optional 4-digit qualifier - zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) - zip.runTests(''' - # traditional ZIP code - 12345 - - # ZIP+4 form - 12101-0001 - - # invalid ZIP - 98765- - ''') - - prints:: - - # traditional ZIP code - 12345 - ['12345'] - - # ZIP+4 form - 12101-0001 - ['12101-0001'] - - # invalid ZIP - 98765- - ^ - FAIL: Expected end of text (at char 5), (line:1, col:6) - """ - __optionalNotMatched = _NullToken() - - def __init__(self, expr, default=__optionalNotMatched): - super(Optional, self).__init__(expr, savelist=False) - self.saveAsList = self.expr.saveAsList - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl(self, instring, loc, doActions=True): - try: - loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) - except (ParseException, IndexError): - if self.defaultValue is not self.__optionalNotMatched: - if self.expr.resultsName: - tokens = ParseResults([self.defaultValue]) - tokens[self.expr.resultsName] = self.defaultValue - else: - tokens = [self.defaultValue] - else: - tokens = [] - return loc, tokens - - def __str__(self): - if hasattr(self, "name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]" - - return self.strRepr - -class SkipTo(ParseElementEnhance): - """Token for skipping over all undefined text until the matched - expression is found. - - Parameters: - - expr - target expression marking the end of the data to be skipped - - include - (default= ``False``) if True, the target expression is also parsed - (the skipped text and target expression are returned as a 2-element list). - - ignore - (default= ``None``) used to define grammars (typically quoted strings and - comments) that might contain false matches to the target expression - - failOn - (default= ``None``) define expressions that are not allowed to be - included in the skipped test; if found before the target expression is found, - the SkipTo is not a match - - Example:: - - report = ''' - Outstanding Issues Report - 1 Jan 2000 - - # | Severity | Description | Days Open - -----+----------+-------------------------------------------+----------- - 101 | Critical | Intermittent system crash | 6 - 94 | Cosmetic | Spelling error on Login ('log|n') | 14 - 79 | Minor | System slow when running too many reports | 47 - ''' - integer = Word(nums) - SEP = Suppress('|') - # use SkipTo to simply match everything up until the next SEP - # - ignore quoted strings, so that a '|' character inside a quoted string does not match - # - parse action will call token.strip() for each matched token, i.e., the description body - string_data = SkipTo(SEP, ignore=quotedString) - string_data.setParseAction(tokenMap(str.strip)) - ticket_expr = (integer("issue_num") + SEP - + string_data("sev") + SEP - + string_data("desc") + SEP - + integer("days_open")) - - for tkt in ticket_expr.searchString(report): - print tkt.dump() - - prints:: - - ['101', 'Critical', 'Intermittent system crash', '6'] - - days_open: 6 - - desc: Intermittent system crash - - issue_num: 101 - - sev: Critical - ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] - - days_open: 14 - - desc: Spelling error on Login ('log|n') - - issue_num: 94 - - sev: Cosmetic - ['79', 'Minor', 'System slow when running too many reports', '47'] - - days_open: 47 - - desc: System slow when running too many reports - - issue_num: 79 - - sev: Minor - """ - def __init__(self, other, include=False, ignore=None, failOn=None): - super(SkipTo, self).__init__(other) - self.ignoreExpr = ignore - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.saveAsList = False - if isinstance(failOn, basestring): - self.failOn = self._literalStringClass(failOn) - else: - self.failOn = failOn - self.errmsg = "No match found for " + _ustr(self.expr) - - def parseImpl(self, instring, loc, doActions=True): - startloc = loc - instrlen = len(instring) - expr = self.expr - expr_parse = self.expr._parse - self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None - self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None - - tmploc = loc - while tmploc <= instrlen: - if self_failOn_canParseNext is not None: - # break if failOn expression matches - if self_failOn_canParseNext(instring, tmploc): - break - - if self_ignoreExpr_tryParse is not None: - # advance past ignore expressions - while 1: - try: - tmploc = self_ignoreExpr_tryParse(instring, tmploc) - except ParseBaseException: - break - - try: - expr_parse(instring, tmploc, doActions=False, callPreParse=False) - except (ParseException, IndexError): - # no match, advance loc in string - tmploc += 1 - else: - # matched skipto expr, done - break - - else: - # ran off the end of the input string without matching skipto expr, fail - raise ParseException(instring, loc, self.errmsg, self) - - # build up return values - loc = tmploc - skiptext = instring[startloc:loc] - skipresult = ParseResults(skiptext) - - if self.includeMatch: - loc, mat = expr_parse(instring, loc, doActions, callPreParse=False) - skipresult += mat - - return loc, skipresult - -class Forward(ParseElementEnhance): - """Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the ``Forward`` - variable using the '<<' operator. - - Note: take care when assigning to ``Forward`` not to overlook - precedence of operators. - - Specifically, '|' has a lower precedence than '<<', so that:: - - fwdExpr << a | b | c - - will actually be evaluated as:: - - (fwdExpr << a) | b | c - - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the ``Forward``:: - - fwdExpr << (a | b | c) - - Converting to use the '<<=' operator instead will avoid this problem. - - See :class:`ParseResults.pprint` for an example of a recursive - parser created using ``Forward``. - """ - def __init__(self, other=None): - super(Forward, self).__init__(other, savelist=False) - - def __lshift__(self, other): - if isinstance(other, basestring): - other = self._literalStringClass(other) - self.expr = other - self.strRepr = None - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars(self.expr.whiteChars) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - return self - - def __ilshift__(self, other): - return self << other - - def leaveWhitespace(self): - self.skipWhitespace = False - return self - - def streamline(self): - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate(self, validateTrace=None): - if validateTrace is None: - validateTrace = [] - - if self not in validateTrace: - tmp = validateTrace[:] + [self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__(self): - if hasattr(self, "name"): - return self.name - if self.strRepr is not None: - return self.strRepr - - # Avoid infinite recursion by setting a temporary strRepr - self.strRepr = ": ..." - - # Use the string representation of main expression. - retString = '...' - try: - if self.expr is not None: - retString = _ustr(self.expr)[:1000] - else: - retString = "None" - finally: - self.strRepr = self.__class__.__name__ + ": " + retString - return self.strRepr - - def copy(self): - if self.expr is not None: - return super(Forward, self).copy() - else: - ret = Forward() - ret <<= self - return ret - - def _setResultsName(self, name, listAllMatches=False): - if __diag__.warn_name_set_on_empty_Forward: - if self.expr is None: - warnings.warn("{0}: setting results name {0!r} on {1} expression " - "that has no contained expression".format("warn_name_set_on_empty_Forward", - name, - type(self).__name__), - stacklevel=3) - - return super(Forward, self)._setResultsName(name, listAllMatches) - -class TokenConverter(ParseElementEnhance): - """ - Abstract subclass of :class:`ParseExpression`, for converting parsed results. - """ - def __init__(self, expr, savelist=False): - super(TokenConverter, self).__init__(expr) # , savelist) - self.saveAsList = False - -class Combine(TokenConverter): - """Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the - input string; this can be disabled by specifying - ``'adjacent=False'`` in the constructor. - - Example:: - - real = Word(nums) + '.' + Word(nums) - print(real.parseString('3.1416')) # -> ['3', '.', '1416'] - # will also erroneously match the following - print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] - - real = Combine(Word(nums) + '.' + Word(nums)) - print(real.parseString('3.1416')) # -> ['3.1416'] - # no match when there are internal spaces - print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) - """ - def __init__(self, expr, joinString="", adjacent=True): - super(Combine, self).__init__(expr) - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leaveWhitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - self.callPreparse = True - - def ignore(self, other): - if self.adjacent: - ParserElement.ignore(self, other) - else: - super(Combine, self).ignore(other) - return self - - def postParse(self, instring, loc, tokenlist): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults) - - if self.resultsName and retToks.haskeys(): - return [retToks] - else: - return retToks - -class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for - returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. - - Example:: - - ident = Word(alphas) - num = Word(nums) - term = ident | num - func = ident + Optional(delimitedList(term)) - print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100'] - - func = ident + Group(Optional(delimitedList(term))) - print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']] - """ - def __init__(self, expr): - super(Group, self).__init__(expr) - self.saveAsList = True - - def postParse(self, instring, loc, tokenlist): - return [tokenlist] - -class Dict(TokenConverter): - """Converter to return a repetitive expression as a list, but also - as a dictionary. Each element can also be referenced using the first - token in the expression as its key. Useful for tabular report - scraping when the first column can be used as a item key. - - Example:: - - data_word = Word(alphas) - label = data_word + FollowedBy(':') - attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - - # print attributes as plain groups - print(OneOrMore(attr_expr).parseString(text).dump()) - - # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names - result = Dict(OneOrMore(Group(attr_expr))).parseString(text) - print(result.dump()) - - # access named fields as dict entries, or output as dict - print(result['shape']) - print(result.asDict()) - - prints:: - - ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} - - See more examples at :class:`ParseResults` of accessing fields by results name. - """ - def __init__(self, expr): - super(Dict, self).__init__(expr) - self.saveAsList = True - - def postParse(self, instring, loc, tokenlist): - for i, tok in enumerate(tokenlist): - if len(tok) == 0: - continue - ikey = tok[0] - if isinstance(ikey, int): - ikey = _ustr(tok[0]).strip() - if len(tok) == 1: - tokenlist[ikey] = _ParseResultsWithOffset("", i) - elif len(tok) == 2 and not isinstance(tok[1], ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) - else: - dictvalue = tok.copy() # ParseResults(i) - del dictvalue[0] - if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) - - if self.resultsName: - return [tokenlist] - else: - return tokenlist - - -class Suppress(TokenConverter): - """Converter for ignoring the results of a parsed expression. - - Example:: - - source = "a, b, c,d" - wd = Word(alphas) - wd_list1 = wd + ZeroOrMore(',' + wd) - print(wd_list1.parseString(source)) - - # often, delimiters that are useful during parsing are just in the - # way afterward - use Suppress to keep them out of the parsed output - wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) - print(wd_list2.parseString(source)) - - prints:: - - ['a', ',', 'b', ',', 'c', ',', 'd'] - ['a', 'b', 'c', 'd'] - - (See also :class:`delimitedList`.) - """ - def postParse(self, instring, loc, tokenlist): - return [] - - def suppress(self): - return self - - -class OnlyOnce(object): - """Wrapper for parse actions, to ensure they are only called once. - """ - def __init__(self, methodCall): - self.callable = _trim_arity(methodCall) - self.called = False - def __call__(self, s, l, t): - if not self.called: - results = self.callable(s, l, t) - self.called = True - return results - raise ParseException(s, l, "") - def reset(self): - self.called = False - -def traceParseAction(f): - """Decorator for debugging parse actions. - - When the parse action is called, this decorator will print - ``">> entering method-name(line:, , )"``. - When the parse action completes, the decorator will print - ``"<<"`` followed by the returned value, or any exception that the parse action raised. - - Example:: - - wd = Word(alphas) - - @traceParseAction - def remove_duplicate_chars(tokens): - return ''.join(sorted(set(''.join(tokens)))) - - wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) - print(wds.parseString("slkdjs sld sldd sdlf sdljf")) - - prints:: - - >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) - < 3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t)) - try: - ret = f(*paArgs) - except Exception as exc: - sys.stderr.write("< ['aa', 'bb', 'cc'] - delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] - """ - dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..." - if combine: - return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) - else: - return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) - -def countedArray(expr, intExpr=None): - """Helper to define a counted list of expressions. - - This helper defines a pattern of the form:: - - integer expr expr expr... - - where the leading integer tells how many expr expressions follow. - The matched tokens returns the array of expr tokens as a list - the - leading count token is suppressed. - - If ``intExpr`` is specified, it should be a pyparsing expression - that produces an integer value. - - Example:: - - countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] - - # in this parser, the leading integer value is given in binary, - # '10' indicating that 2 values are in the array - binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) - countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] - """ - arrayExpr = Forward() - def countFieldParseAction(s, l, t): - n = t[0] - arrayExpr << (n and Group(And([expr] * n)) or Group(empty)) - return [] - if intExpr is None: - intExpr = Word(nums).setParseAction(lambda t: int(t[0])) - else: - intExpr = intExpr.copy() - intExpr.setName("arrayLen") - intExpr.addParseAction(countFieldParseAction, callDuringTry=True) - return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...') - -def _flatten(L): - ret = [] - for i in L: - if isinstance(i, list): - ret.extend(_flatten(i)) - else: - ret.append(i) - return ret - -def matchPreviousLiteral(expr): - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks for - a 'repeat' of a previous expression. For example:: - - first = Word(nums) - second = matchPreviousLiteral(first) - matchExpr = first + ":" + second - - will match ``"1:1"``, but not ``"1:2"``. Because this - matches a previous literal, will also match the leading - ``"1:1"`` in ``"1:10"``. If this is not desired, use - :class:`matchPreviousExpr`. Do *not* use with packrat parsing - enabled. - """ - rep = Forward() - def copyTokenToRepeater(s, l, t): - if t: - if len(t) == 1: - rep << t[0] - else: - # flatten t tokens - tflat = _flatten(t.asList()) - rep << And(Literal(tt) for tt in tflat) - else: - rep << Empty() - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def matchPreviousExpr(expr): - """Helper to define an expression that is indirectly defined from - the tokens matched in a previous expression, that is, it looks for - a 'repeat' of a previous expression. For example:: - - first = Word(nums) - second = matchPreviousExpr(first) - matchExpr = first + ":" + second - - will match ``"1:1"``, but not ``"1:2"``. Because this - matches by expressions, will *not* match the leading ``"1:1"`` - in ``"1:10"``; the expressions are evaluated first, and then - compared, so ``"1"`` is compared with ``"10"``. Do *not* use - with packrat parsing enabled. - """ - rep = Forward() - e2 = expr.copy() - rep <<= e2 - def copyTokenToRepeater(s, l, t): - matchTokens = _flatten(t.asList()) - def mustMatchTheseTokens(s, l, t): - theseTokens = _flatten(t.asList()) - if theseTokens != matchTokens: - raise ParseException('', 0, '') - rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) - expr.addParseAction(copyTokenToRepeater, callDuringTry=True) - rep.setName('(prev) ' + _ustr(expr)) - return rep - -def _escapeRegexRangeChars(s): - # ~ escape these chars: ^-[] - for c in r"\^-[]": - s = s.replace(c, _bslash + c) - s = s.replace("\n", r"\n") - s = s.replace("\t", r"\t") - return _ustr(s) - -def oneOf(strs, caseless=False, useRegex=True, asKeyword=False): - """Helper to quickly define a set of alternative Literals, and makes - sure to do longest-first testing when there is a conflict, - regardless of the input order, but returns - a :class:`MatchFirst` for best performance. - - Parameters: - - - strs - a string of space-delimited literals, or a collection of - string literals - - caseless - (default= ``False``) - treat all literals as - caseless - - useRegex - (default= ``True``) - as an optimization, will - generate a Regex object; otherwise, will generate - a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if - creating a :class:`Regex` raises an exception) - - asKeyword - (default=``False``) - enforce Keyword-style matching on the - generated expressions - - Example:: - - comp_oper = oneOf("< = > <= >= !=") - var = Word(alphas) - number = Word(nums) - term = var | number - comparison_expr = term + comp_oper + term - print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) - - prints:: - - [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] - """ - if isinstance(caseless, basestring): - warnings.warn("More than one string argument passed to oneOf, pass " - "choices as a list or space-delimited string", stacklevel=2) - - if caseless: - isequal = (lambda a, b: a.upper() == b.upper()) - masks = (lambda a, b: b.upper().startswith(a.upper())) - parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral - else: - isequal = (lambda a, b: a == b) - masks = (lambda a, b: b.startswith(a)) - parseElementClass = Keyword if asKeyword else Literal - - symbols = [] - if isinstance(strs, basestring): - symbols = strs.split() - elif isinstance(strs, Iterable): - symbols = list(strs) - else: - warnings.warn("Invalid argument to oneOf, expected string or iterable", - SyntaxWarning, stacklevel=2) - if not symbols: - return NoMatch() - - if not asKeyword: - # if not producing keywords, need to reorder to take care to avoid masking - # longer choices with shorter ones - i = 0 - while i < len(symbols) - 1: - cur = symbols[i] - for j, other in enumerate(symbols[i + 1:]): - if isequal(other, cur): - del symbols[i + j + 1] - break - elif masks(cur, other): - del symbols[i + j + 1] - symbols.insert(i, other) - break - else: - i += 1 - - if not (caseless or asKeyword) and useRegex: - # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) - try: - if len(symbols) == len("".join(symbols)): - return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols)) - else: - return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols)) - except Exception: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - - # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) - -def dictOf(key, value): - """Helper to easily and clearly define a dictionary by specifying - the respective patterns for the key and value. Takes care of - defining the :class:`Dict`, :class:`ZeroOrMore`, and - :class:`Group` tokens in the proper order. The key pattern - can include delimiting markers or punctuation, as long as they are - suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the :class:`Dict` results - can include named token fields. - - Example:: - - text = "shape: SQUARE posn: upper left color: light blue texture: burlap" - attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) - print(OneOrMore(attr_expr).parseString(text).dump()) - - attr_label = label - attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) - - # similar to Dict, but simpler call format - result = dictOf(attr_label, attr_value).parseString(text) - print(result.dump()) - print(result['shape']) - print(result.shape) # object attribute access works too - print(result.asDict()) - - prints:: - - [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - - color: light blue - - posn: upper left - - shape: SQUARE - - texture: burlap - SQUARE - SQUARE - {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} - """ - return Dict(OneOrMore(Group(key + value))) - -def originalTextFor(expr, asString=True): - """Helper to return the original, untokenized text for a given - expression. Useful to restore the parsed fields of an HTML start - tag into the raw tag text itself, or to revert separate tokens with - intervening whitespace back to the original matching input text. By - default, returns astring containing the original parsed text. - - If the optional ``asString`` argument is passed as - ``False``, then the return value is - a :class:`ParseResults` containing any results names that - were originally matched, and a single token containing the original - matched text from the input string. So if the expression passed to - :class:`originalTextFor` contains expressions with defined - results names, you must set ``asString`` to ``False`` if you - want to preserve those results name values. - - Example:: - - src = "this is test bold text normal text " - for tag in ("b", "i"): - opener, closer = makeHTMLTags(tag) - patt = originalTextFor(opener + SkipTo(closer) + closer) - print(patt.searchString(src)[0]) - - prints:: - - [' bold text '] - ['text'] - """ - locMarker = Empty().setParseAction(lambda s, loc, t: loc) - endlocMarker = locMarker.copy() - endlocMarker.callPreparse = False - matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") - if asString: - extractText = lambda s, l, t: s[t._original_start: t._original_end] - else: - def extractText(s, l, t): - t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] - matchExpr.setParseAction(extractText) - matchExpr.ignoreExprs = expr.ignoreExprs - return matchExpr - -def ungroup(expr): - """Helper to undo pyparsing's default grouping of And expressions, - even if all but one are non-empty. - """ - return TokenConverter(expr).addParseAction(lambda t: t[0]) - -def locatedExpr(expr): - """Helper to decorate a returned token with its starting and ending - locations in the input string. - - This helper adds the following results names: - - - locn_start = location where matched expression begins - - locn_end = location where matched expression ends - - value = the actual parsed results - - Be careful if the input text contains ```` characters, you - may want to call :class:`ParserElement.parseWithTabs` - - Example:: - - wd = Word(alphas) - for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): - print(match) - - prints:: - - [[0, 'ljsdf', 5]] - [[8, 'lksdjjf', 15]] - [[18, 'lkkjj', 23]] - """ - locator = Empty().setParseAction(lambda s, l, t: l) - return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) - - -# convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") -stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1]) -_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16))) -_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]" - -def srange(s): - r"""Helper to easily define string ranges for use in Word - construction. Borrows syntax from regexp '[]' string range - definitions:: - - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - - The input string must be enclosed in []'s, and the returned string - is the expanded character set joined into a single string. The - values enclosed in the []'s may be: - - - a single character - - an escaped character with a leading backslash (such as ``\-`` - or ``\]``) - - an escaped hex character with a leading ``'\x'`` - (``\x21``, which is a ``'!'`` character) (``\0x##`` - is also supported for backwards compatibility) - - an escaped octal character with a leading ``'\0'`` - (``\041``, which is a ``'!'`` character) - - a range of any of the above, separated by a dash (``'a-z'``, - etc.) - - any combination of the above (``'aeiouy'``, - ``'a-zA-Z0-9_$'``, etc.) - """ - _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) - try: - return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) - except Exception: - return "" - -def matchOnlyAtCol(n): - """Helper method for defining parse actions that require matching at - a specific column in the input text. - """ - def verifyCol(strg, locn, toks): - if col(locn, strg) != n: - raise ParseException(strg, locn, "matched token not at column %d" % n) - return verifyCol - -def replaceWith(replStr): - """Helper method for common parse actions that simply return - a literal value. Especially useful when used with - :class:`transformString` (). - - Example:: - - num = Word(nums).setParseAction(lambda toks: int(toks[0])) - na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) - term = na | num - - OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] - """ - return lambda s, l, t: [replStr] - -def removeQuotes(s, l, t): - """Helper parse action for removing quotation marks from parsed - quoted strings. - - Example:: - - # by default, quotation marks are included in parsed results - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] - - # use removeQuotes to strip quotation marks from parsed results - quotedString.setParseAction(removeQuotes) - quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] - """ - return t[0][1:-1] - -def tokenMap(func, *args): - """Helper to define a parse action by mapping a function to all - elements of a ParseResults list. If any additional args are passed, - they are forwarded to the given function as additional arguments - after the token, as in - ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``, - which will convert the parsed data to an integer using base 16. - - Example (compare the last to example in :class:`ParserElement.transformString`:: - - hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) - hex_ints.runTests(''' - 00 11 22 aa FF 0a 0d 1a - ''') - - upperword = Word(alphas).setParseAction(tokenMap(str.upper)) - OneOrMore(upperword).runTests(''' - my kingdom for a horse - ''') - - wd = Word(alphas).setParseAction(tokenMap(str.title)) - OneOrMore(wd).setParseAction(' '.join).runTests(''' - now is the winter of our discontent made glorious summer by this sun of york - ''') - - prints:: - - 00 11 22 aa FF 0a 0d 1a - [0, 17, 34, 170, 255, 10, 13, 26] - - my kingdom for a horse - ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] - - now is the winter of our discontent made glorious summer by this sun of york - ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] - """ - def pa(s, l, t): - return [func(tokn, *args) for tokn in t] - - try: - func_name = getattr(func, '__name__', - getattr(func, '__class__').__name__) - except Exception: - func_name = str(func) - pa.__name__ = func_name - - return pa - -upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) -"""(Deprecated) Helper parse action to convert tokens to upper case. -Deprecated in favor of :class:`pyparsing_common.upcaseTokens`""" - -downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) -"""(Deprecated) Helper parse action to convert tokens to lower case. -Deprecated in favor of :class:`pyparsing_common.downcaseTokens`""" - -def _makeTags(tagStr, xml, - suppress_LT=Suppress("<"), - suppress_GT=Suppress(">")): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr, basestring): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas, alphanums + "_-:") - if xml: - tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) - openTag = (suppress_LT - + tagStr("tag") - + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) - + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') - + suppress_GT) - else: - tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">") - openTag = (suppress_LT - + tagStr("tag") - + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) - + Optional(Suppress("=") + tagAttrValue)))) - + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') - + suppress_GT) - closeTag = Combine(_L("", adjacent=False) - - openTag.setName("<%s>" % resname) - # add start results name in parse action now that ungrouped names are not reported at two levels - openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy())) - closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("" % resname) - openTag.tag = resname - closeTag.tag = resname - openTag.tag_body = SkipTo(closeTag()) - return openTag, closeTag - -def makeHTMLTags(tagStr): - """Helper to construct opening and closing tag expressions for HTML, - given a tag name. Matches tags in either upper or lower case, - attributes with namespaces and with quoted or unquoted values. - - Example:: - - text = 'More info at the pyparsing wiki page' - # makeHTMLTags returns pyparsing expressions for the opening and - # closing tags as a 2-tuple - a, a_end = makeHTMLTags("A") - link_expr = a + SkipTo(a_end)("link_text") + a_end - - for link in link_expr.searchString(text): - # attributes in the tag (like "href" shown here) are - # also accessible as named results - print(link.link_text, '->', link.href) - - prints:: - - pyparsing -> https://github.com/pyparsing/pyparsing/wiki - """ - return _makeTags(tagStr, False) - -def makeXMLTags(tagStr): - """Helper to construct opening and closing tag expressions for XML, - given a tag name. Matches tags only in the given upper/lower case. - - Example: similar to :class:`makeHTMLTags` - """ - return _makeTags(tagStr, True) - -def withAttribute(*args, **attrDict): - """Helper to create a validating parse action to be used with start - tags created with :class:`makeXMLTags` or - :class:`makeHTMLTags`. Use ``withAttribute`` to qualify - a starting tag with a required attribute value, to avoid false - matches on common tags such as ```` or ``
``. - - Call ``withAttribute`` with a series of attribute names and - values. Specify the list of filter attributes names and values as: - - - keyword arguments, as in ``(align="right")``, or - - as an explicit dict with ``**`` operator, when an attribute - name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` - - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` - - For attribute names with a namespace prefix, you must use the second - form. Attribute names are matched insensitive to upper/lower case. - - If just testing for ``class`` (with or without a namespace), use - :class:`withClass`. - - To verify that the attribute exists, but without specifying a value, - pass ``withAttribute.ANY_VALUE`` as the value. - - Example:: - - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this has no type
-
- - ''' - div,div_end = makeHTMLTags("div") - - # only match div tag having a type attribute with value "grid" - div_grid = div().setParseAction(withAttribute(type="grid")) - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - # construct a match with any div tag having a type attribute, regardless of the value - div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - - prints:: - - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - if args: - attrs = args[:] - else: - attrs = attrDict.items() - attrs = [(k, v) for k, v in attrs] - def pa(s, l, tokens): - for attrName, attrValue in attrs: - if attrName not in tokens: - raise ParseException(s, l, "no matching attribute " + attrName) - if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) - return pa -withAttribute.ANY_VALUE = object() - -def withClass(classname, namespace=''): - """Simplified version of :class:`withAttribute` when - matching on a div class - made difficult because ``class`` is - a reserved word in Python. - - Example:: - - html = ''' -
- Some text -
1 4 0 1 0
-
1,3 2,3 1,1
-
this <div> has no class
-
- - ''' - div,div_end = makeHTMLTags("div") - div_grid = div().setParseAction(withClass("grid")) - - grid_expr = div_grid + SkipTo(div | div_end)("body") - for grid_header in grid_expr.searchString(html): - print(grid_header.body) - - div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) - div_expr = div_any_type + SkipTo(div | div_end)("body") - for div_header in div_expr.searchString(html): - print(div_header.body) - - prints:: - - 1 4 0 1 0 - - 1 4 0 1 0 - 1,3 2,3 1,1 - """ - classattr = "%s:class" % namespace if namespace else "class" - return withAttribute(**{classattr: classname}) - -opAssoc = SimpleNamespace() -opAssoc.LEFT = object() -opAssoc.RIGHT = object() - -def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')): - """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary - or binary, left- or right-associative. Parse actions can also be - attached to operator expressions. The generated parser will also - recognize the use of parentheses to override operator precedences - (see example below). - - Note: if you define a deep operator list, you may see performance - issues when using infixNotation. See - :class:`ParserElement.enablePackrat` for a mechanism to potentially - improve your parser performance. - - Parameters: - - baseExpr - expression representing the most basic element for the - nested - - opList - list of tuples, one for each operator precedence level - in the expression grammar; each tuple is of the form ``(opExpr, - numTerms, rightLeftAssoc, parseAction)``, where: - - - opExpr is the pyparsing expression for the operator; may also - be a string, which will be converted to a Literal; if numTerms - is 3, opExpr is a tuple of two expressions, for the two - operators separating the 3 terms - - numTerms is the number of terms for this operator (must be 1, - 2, or 3) - - rightLeftAssoc is the indicator whether the operator is right - or left associative, using the pyparsing-defined constants - ``opAssoc.RIGHT`` and ``opAssoc.LEFT``. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the parse action - tuple member may be omitted); if the parse action is passed - a tuple or list of functions, this is equivalent to calling - ``setParseAction(*fn)`` - (:class:`ParserElement.setParseAction`) - - lpar - expression for matching left-parentheses - (default= ``Suppress('(')``) - - rpar - expression for matching right-parentheses - (default= ``Suppress(')')``) - - Example:: - - # simple example of four-function arithmetic with ints and - # variable names - integer = pyparsing_common.signed_integer - varname = pyparsing_common.identifier - - arith_expr = infixNotation(integer | varname, - [ - ('-', 1, opAssoc.RIGHT), - (oneOf('* /'), 2, opAssoc.LEFT), - (oneOf('+ -'), 2, opAssoc.LEFT), - ]) - - arith_expr.runTests(''' - 5+3*6 - (5+3)*6 - -2--11 - ''', fullDump=False) - - prints:: - - 5+3*6 - [[5, '+', [3, '*', 6]]] - - (5+3)*6 - [[[5, '+', 3], '*', 6]] - - -2--11 - [[['-', 2], '-', ['-', 11]]] - """ - # captive version of FollowedBy that does not do parse actions or capture results names - class _FB(FollowedBy): - def parseImpl(self, instring, loc, doActions=True): - self.expr.tryParse(instring, loc) - return loc, [] - - ret = Forward() - lastExpr = baseExpr | (lpar + ret + rpar) - for i, operDef in enumerate(opList): - opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4] - termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr - if arity == 3: - if opExpr is None or len(opExpr) != 2: - raise ValueError( - "if numterms=3, opExpr must be a tuple or list of two expressions") - opExpr1, opExpr2 = opExpr - thisExpr = Forward().setName(termName) - if rightLeftAssoc == opAssoc.LEFT: - if arity == 1: - matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr)) - elif arity == 2: - if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr)) - else: - matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) - elif arity == 3: - matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) - + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - elif rightLeftAssoc == opAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Optional): - opExpr = Optional(opExpr) - matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) - elif arity == 2: - if opExpr is not None: - matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr)) - else: - matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr)) - elif arity == 3: - matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) - + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - else: - raise ValueError("operator must indicate right or left associativity") - if pa: - if isinstance(pa, (tuple, list)): - matchExpr.setParseAction(*pa) - else: - matchExpr.setParseAction(pa) - thisExpr <<= (matchExpr.setName(termName) | lastExpr) - lastExpr = thisExpr - ret <<= lastExpr - return ret - -operatorPrecedence = infixNotation -"""(Deprecated) Former name of :class:`infixNotation`, will be -dropped in a future release.""" - -dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes") -sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes") -quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' - | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") - -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): - """Helper method for defining nested lists enclosed in opening and - closing delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list - (default= ``"("``); can also be a pyparsing expression - - closer - closing character for a nested list - (default= ``")"``); can also be a pyparsing expression - - content - expression for items within the nested lists - (default= ``None``) - - ignoreExpr - expression for ignoring opening and closing - delimiters (default= :class:`quotedString`) - - If an expression is not provided for the content argument, the - nested expression will capture all whitespace-delimited content - between delimiters as a list of separate values. - - Use the ``ignoreExpr`` argument to define expressions that may - contain opening or closing characters that should not be treated as - opening or closing characters for nesting, such as quotedString or - a comment expression. Specify multiple expressions using an - :class:`Or` or :class:`MatchFirst`. The default is - :class:`quotedString`, but if no expressions are to be ignored, then - pass ``None`` for this argument. - - Example:: - - data_type = oneOf("void int short long char float double") - decl_data_type = Combine(data_type + Optional(Word('*'))) - ident = Word(alphas+'_', alphanums+'_') - number = pyparsing_common.number - arg = Group(decl_data_type + ident) - LPAR, RPAR = map(Suppress, "()") - - code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) - - c_function = (decl_data_type("type") - + ident("name") - + LPAR + Optional(delimitedList(arg), [])("args") + RPAR - + code_body("body")) - c_function.ignore(cStyleComment) - - source_code = ''' - int is_odd(int x) { - return (x%2); - } - - int dec_to_hex(char hchar) { - if (hchar >= '0' && hchar <= '9') { - return (ord(hchar)-ord('0')); - } else { - return (10+ord(hchar)-ord('A')); - } - } - ''' - for func in c_function.searchString(source_code): - print("%(name)s (%(type)s) args: %(args)s" % func) - - - prints:: - - is_odd (int) args: [['int', 'x']] - dec_to_hex (int) args: [['char', 'hchar']] - """ - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener, basestring) and isinstance(closer, basestring): - if len(opener) == 1 and len(closer) == 1: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr - + CharsNotIn(opener - + closer - + ParserElement.DEFAULT_WHITE_CHARS, exact=1) - ) - ).setParseAction(lambda t: t[0].strip())) - else: - content = (empty.copy() + CharsNotIn(opener - + closer - + ParserElement.DEFAULT_WHITE_CHARS - ).setParseAction(lambda t: t[0].strip())) - else: - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr - + ~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) - ).setParseAction(lambda t: t[0].strip())) - else: - content = (Combine(OneOrMore(~Literal(opener) - + ~Literal(closer) - + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) - ).setParseAction(lambda t: t[0].strip())) - else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") - ret = Forward() - if ignoreExpr is not None: - ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) - else: - ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) - ret.setName('nested %s%s expression' % (opener, closer)) - return ret - -def indentedBlock(blockStatementExpr, indentStack, indent=True): - """Helper method for defining space-delimited indentation blocks, - such as those used to define block statements in Python source code. - - Parameters: - - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single - grammar should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond - the current level; set to False for block of left-most - statements (default= ``True``) - - A valid block must contain at least one ``blockStatement``. - - Example:: - - data = ''' - def A(z): - A1 - B = 100 - G = A2 - A2 - A3 - B - def BB(a,b,c): - BB1 - def BBA(): - bba1 - bba2 - bba3 - C - D - def spam(x,y): - def eggs(z): - pass - ''' - - - indentStack = [1] - stmt = Forward() - - identifier = Word(alphas, alphanums) - funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":") - func_body = indentedBlock(stmt, indentStack) - funcDef = Group(funcDecl + func_body) - - rvalue = Forward() - funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") - rvalue << (funcCall | identifier | Word(nums)) - assignment = Group(identifier + "=" + rvalue) - stmt << (funcDef | assignment | identifier) - - module_body = OneOrMore(stmt) - - parseTree = module_body.parseString(data) - parseTree.pprint() - - prints:: - - [['def', - 'A', - ['(', 'z', ')'], - ':', - [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], - 'B', - ['def', - 'BB', - ['(', 'a', 'b', 'c', ')'], - ':', - [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], - 'C', - 'D', - ['def', - 'spam', - ['(', 'x', 'y', ')'], - ':', - [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] - """ - backup_stack = indentStack[:] - - def reset_stack(): - indentStack[:] = backup_stack - - def checkPeerIndent(s, l, t): - if l >= len(s): return - curCol = col(l, s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseException(s, l, "illegal nesting") - raise ParseException(s, l, "not a peer entry") - - def checkSubIndent(s, l, t): - curCol = col(l, s) - if curCol > indentStack[-1]: - indentStack.append(curCol) - else: - raise ParseException(s, l, "not a subentry") - - def checkUnindent(s, l, t): - if l >= len(s): return - curCol = col(l, s) - if not(indentStack and curCol in indentStack): - raise ParseException(s, l, "not an unindent") - if curCol < indentStack[-1]: - indentStack.pop() - - NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd()) - INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') - PEER = Empty().setParseAction(checkPeerIndent).setName('') - UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') - if indent: - smExpr = Group(Optional(NL) - + INDENT - + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) - + UNDENT) - else: - smExpr = Group(Optional(NL) - + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) - + UNDENT) - smExpr.setFailAction(lambda a, b, c, d: reset_stack()) - blockStatementExpr.ignore(_bslash + LineEnd()) - return smExpr.setName('indented block') - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag')) -_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\'')) -commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") -def replaceHTMLEntity(t): - """Helper parser action to replace common HTML entities with their special characters""" - return _htmlEntityMap.get(t.entity) - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") -"Comment of the form ``/* ... */``" - -htmlComment = Regex(r"").setName("HTML comment") -"Comment of the form ````" - -restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") -dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") -"Comment of the form ``// ... (to end of line)``" - -cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment") -"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`" - -javaStyleComment = cppStyleComment -"Same as :class:`cppStyleComment`" - -pythonStyleComment = Regex(r"#.*").setName("Python style comment") -"Comment of the form ``# ... (to end of line)``" - -_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') - + Optional(Word(" \t") - + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") -commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList") -"""(Deprecated) Predefined expression of 1 or more printable words or -quoted strings, separated by commas. - -This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`. -""" - -# some other useful expressions - using lower-case class name since we are really using this as a namespace -class pyparsing_common: - """Here are some common low-level expressions that may be useful in - jump-starting parser development: - - - numeric forms (:class:`integers`, :class:`reals`, - :class:`scientific notation`) - - common :class:`programming identifiers` - - network addresses (:class:`MAC`, - :class:`IPv4`, :class:`IPv6`) - - ISO8601 :class:`dates` and - :class:`datetime` - - :class:`UUID` - - :class:`comma-separated list` - - Parse actions: - - - :class:`convertToInteger` - - :class:`convertToFloat` - - :class:`convertToDate` - - :class:`convertToDatetime` - - :class:`stripHTMLTags` - - :class:`upcaseTokens` - - :class:`downcaseTokens` - - Example:: - - pyparsing_common.number.runTests(''' - # any int or real number, returned as the appropriate type - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.fnumber.runTests(''' - # any int or real number, returned as float - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - ''') - - pyparsing_common.hex_integer.runTests(''' - # hex numbers - 100 - FF - ''') - - pyparsing_common.fraction.runTests(''' - # fractions - 1/2 - -3/4 - ''') - - pyparsing_common.mixed_integer.runTests(''' - # mixed fractions - 1 - 1/2 - -3/4 - 1-3/4 - ''') - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(''' - # uuid - 12345678-1234-5678-1234-567812345678 - ''') - - prints:: - - # any int or real number, returned as the appropriate type - 100 - [100] - - -100 - [-100] - - +100 - [100] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # any int or real number, returned as float - 100 - [100.0] - - -100 - [-100.0] - - +100 - [100.0] - - 3.14159 - [3.14159] - - 6.02e23 - [6.02e+23] - - 1e-12 - [1e-12] - - # hex numbers - 100 - [256] - - FF - [255] - - # fractions - 1/2 - [0.5] - - -3/4 - [-0.75] - - # mixed fractions - 1 - [1] - - 1/2 - [0.5] - - -3/4 - [-0.75] - - 1-3/4 - [1.75] - - # uuid - 12345678-1234-5678-1234-567812345678 - [UUID('12345678-1234-5678-1234-567812345678')] - """ - - convertToInteger = tokenMap(int) - """ - Parse action for converting parsed integers to Python int - """ - - convertToFloat = tokenMap(float) - """ - Parse action for converting parsed numbers to Python float - """ - - integer = Word(nums).setName("integer").setParseAction(convertToInteger) - """expression that parses an unsigned integer, returns an int""" - - hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) - """expression that parses a hexadecimal integer, returns an int""" - - signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) - """expression that parses an integer with optional leading sign, returns an int""" - - fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") - """fractional expression of an integer divided by an integer, returns a float""" - fraction.addParseAction(lambda t: t[0]/t[-1]) - - mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") - """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" - mixed_integer.addParseAction(sum) - - real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat) - """expression that parses a floating point number and returns a float""" - - sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) - """expression that parses a floating point number with optional - scientific notation and returns a float""" - - # streamlining this expression makes the docs nicer-looking - number = (sci_real | real | signed_integer).streamline() - """any numeric expression, returns the corresponding Python type""" - - fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) - """any int or real number, returned as float""" - - identifier = Word(alphas + '_', alphanums + '_').setName("identifier") - """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" - - ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") - "IPv4 address (``0.0.0.0 - 255.255.255.255``)" - - _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") - _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address") - _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) - + "::" - + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) - ).setName("short IPv6 address") - _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) - _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") - ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") - "IPv6 address (long, short, or mixed form)" - - mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") - "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" - - @staticmethod - def convertToDate(fmt="%Y-%m-%d"): - """ - Helper to create a parse action for converting parsed date string to Python datetime.date - - Params - - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) - - Example:: - - date_expr = pyparsing_common.iso8601_date.copy() - date_expr.setParseAction(pyparsing_common.convertToDate()) - print(date_expr.parseString("1999-12-31")) - - prints:: - - [datetime.date(1999, 12, 31)] - """ - def cvt_fn(s, l, t): - try: - return datetime.strptime(t[0], fmt).date() - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - @staticmethod - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): - """Helper to create a parse action for converting parsed - datetime string to Python datetime.datetime - - Params - - - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) - - Example:: - - dt_expr = pyparsing_common.iso8601_datetime.copy() - dt_expr.setParseAction(pyparsing_common.convertToDatetime()) - print(dt_expr.parseString("1999-12-31T23:59:59.999")) - - prints:: - - [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] - """ - def cvt_fn(s, l, t): - try: - return datetime.strptime(t[0], fmt) - except ValueError as ve: - raise ParseException(s, l, str(ve)) - return cvt_fn - - iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") - "ISO8601 date (``yyyy-mm-dd``)" - - iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") - "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" - - uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") - "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" - - _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() - @staticmethod - def stripHTMLTags(s, l, tokens): - """Parse action to remove HTML tags from web page HTML source - - Example:: - - # strip HTML links from normal text - text = 'More info at the
pyparsing wiki page' - td, td_end = makeHTMLTags("TD") - table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end - print(table_text.parseString(text).body) - - Prints:: - - More info at the pyparsing wiki page - """ - return pyparsing_common._html_stripper.transformString(tokens[0]) - - _commasepitem = Combine(OneOrMore(~Literal(",") - + ~LineEnd() - + Word(printables, excludeChars=',') - + Optional(White(" \t")))).streamline().setName("commaItem") - comma_separated_list = delimitedList(Optional(quotedString.copy() - | _commasepitem, default='') - ).setName("comma separated list") - """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" - - upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) - """Parse action to convert tokens to upper case.""" - - downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) - """Parse action to convert tokens to lower case.""" - - -class _lazyclassproperty(object): - def __init__(self, fn): - self.fn = fn - self.__doc__ = fn.__doc__ - self.__name__ = fn.__name__ - - def __get__(self, obj, cls): - if cls is None: - cls = type(obj) - if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) - for superclass in cls.__mro__[1:]): - cls._intern = {} - attrname = self.fn.__name__ - if attrname not in cls._intern: - cls._intern[attrname] = self.fn(cls) - return cls._intern[attrname] - - -class unicode_set(object): - """ - A set of Unicode characters, for language-specific strings for - ``alphas``, ``nums``, ``alphanums``, and ``printables``. - A unicode_set is defined by a list of ranges in the Unicode character - set, in a class attribute ``_ranges``, such as:: - - _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] - - A unicode set can also be defined using multiple inheritance of other unicode sets:: - - class CJK(Chinese, Japanese, Korean): - pass - """ - _ranges = [] - - @classmethod - def _get_chars_for_ranges(cls): - ret = [] - for cc in cls.__mro__: - if cc is unicode_set: - break - for rr in cc._ranges: - ret.extend(range(rr[0], rr[-1] + 1)) - return [unichr(c) for c in sorted(set(ret))] - - @_lazyclassproperty - def printables(cls): - "all non-whitespace characters in this range" - return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges())) - - @_lazyclassproperty - def alphas(cls): - "all alphabetic characters in this range" - return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges())) - - @_lazyclassproperty - def nums(cls): - "all numeric digit characters in this range" - return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges())) - - @_lazyclassproperty - def alphanums(cls): - "all alphanumeric characters in this range" - return cls.alphas + cls.nums - - -class pyparsing_unicode(unicode_set): - """ - A namespace class for defining common language unicode_sets. - """ - _ranges = [(32, sys.maxunicode)] - - class Latin1(unicode_set): - "Unicode set for Latin-1 Unicode Character Range" - _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] - - class LatinA(unicode_set): - "Unicode set for Latin-A Unicode Character Range" - _ranges = [(0x0100, 0x017f),] - - class LatinB(unicode_set): - "Unicode set for Latin-B Unicode Character Range" - _ranges = [(0x0180, 0x024f),] - - class Greek(unicode_set): - "Unicode set for Greek Unicode Character Ranges" - _ranges = [ - (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d), - (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4), - (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe), - ] - - class Cyrillic(unicode_set): - "Unicode set for Cyrillic Unicode Character Range" - _ranges = [(0x0400, 0x04ff)] - - class Chinese(unicode_set): - "Unicode set for Chinese Unicode Character Range" - _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),] - - class Japanese(unicode_set): - "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" - _ranges = [] - - class Kanji(unicode_set): - "Unicode set for Kanji Unicode Character Range" - _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),] - - class Hiragana(unicode_set): - "Unicode set for Hiragana Unicode Character Range" - _ranges = [(0x3040, 0x309f),] - - class Katakana(unicode_set): - "Unicode set for Katakana Unicode Character Range" - _ranges = [(0x30a0, 0x30ff),] - - class Korean(unicode_set): - "Unicode set for Korean Unicode Character Range" - _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),] - - class CJK(Chinese, Japanese, Korean): - "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" - pass - - class Thai(unicode_set): - "Unicode set for Thai Unicode Character Range" - _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),] - - class Arabic(unicode_set): - "Unicode set for Arabic Unicode Character Range" - _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),] - - class Hebrew(unicode_set): - "Unicode set for Hebrew Unicode Character Range" - _ranges = [(0x0590, 0x05ff),] - - class Devanagari(unicode_set): - "Unicode set for Devanagari Unicode Character Range" - _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)] - -pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges - + pyparsing_unicode.Japanese.Hiragana._ranges - + pyparsing_unicode.Japanese.Katakana._ranges) - -# define ranges in language character sets -if PY_3: - setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic) - setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese) - setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic) - setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek) - setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew) - setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese) - setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji) - setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana) - setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana) - setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean) - setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai) - setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari) - - -class pyparsing_test: - """ - namespace class for classes useful in writing unit tests - """ - - class reset_pyparsing_context: - """ - Context manager to be used when writing unit tests that modify pyparsing config values: - - packrat parsing - - default whitespace characters. - - default keyword characters - - literal string auto-conversion class - - __diag__ settings - - Example: - with reset_pyparsing_context(): - # test that literals used to construct a grammar are automatically suppressed - ParserElement.inlineLiteralsUsing(Suppress) - - term = Word(alphas) | Word(nums) - group = Group('(' + term[...] + ')') - - # assert that the '()' characters are not included in the parsed tokens - self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def']) - - # after exiting context manager, literals are converted to Literal expressions again - """ - - def __init__(self): - self._save_context = {} - - def save(self): - self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS - self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS - self._save_context[ - "literal_string_class" - ] = ParserElement._literalStringClass - self._save_context["packrat_enabled"] = ParserElement._packratEnabled - self._save_context["packrat_parse"] = ParserElement._parse - self._save_context["__diag__"] = { - name: getattr(__diag__, name) for name in __diag__._all_names - } - self._save_context["__compat__"] = { - "collect_all_And_tokens": __compat__.collect_all_And_tokens - } - return self - - def restore(self): - # reset pyparsing global state - if ( - ParserElement.DEFAULT_WHITE_CHARS - != self._save_context["default_whitespace"] - ): - ParserElement.setDefaultWhitespaceChars( - self._save_context["default_whitespace"] - ) - Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] - ParserElement.inlineLiteralsUsing( - self._save_context["literal_string_class"] - ) - for name, value in self._save_context["__diag__"].items(): - setattr(__diag__, name, value) - ParserElement._packratEnabled = self._save_context["packrat_enabled"] - ParserElement._parse = self._save_context["packrat_parse"] - __compat__.collect_all_And_tokens = self._save_context["__compat__"] - - def __enter__(self): - return self.save() - - def __exit__(self, *args): - return self.restore() - - class TestParseResultsAsserts: - """ - A mixin class to add parse results assertion methods to normal unittest.TestCase classes. - """ - def assertParseResultsEquals( - self, result, expected_list=None, expected_dict=None, msg=None - ): - """ - Unit test assertion to compare a ParseResults object with an optional expected_list, - and compare any defined results names with an optional expected_dict. - """ - if expected_list is not None: - self.assertEqual(expected_list, result.asList(), msg=msg) - if expected_dict is not None: - self.assertEqual(expected_dict, result.asDict(), msg=msg) - - def assertParseAndCheckList( - self, expr, test_string, expected_list, msg=None, verbose=True - ): - """ - Convenience wrapper assert to test a parser element and input string, and assert that - the resulting ParseResults.asList() is equal to the expected_list. - """ - result = expr.parseString(test_string, parseAll=True) - if verbose: - print(result.dump()) - self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) - - def assertParseAndCheckDict( - self, expr, test_string, expected_dict, msg=None, verbose=True - ): - """ - Convenience wrapper assert to test a parser element and input string, and assert that - the resulting ParseResults.asDict() is equal to the expected_dict. - """ - result = expr.parseString(test_string, parseAll=True) - if verbose: - print(result.dump()) - self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) - - def assertRunTestResults( - self, run_tests_report, expected_parse_results=None, msg=None - ): - """ - Unit test assertion to evaluate output of ParserElement.runTests(). If a list of - list-dict tuples is given as the expected_parse_results argument, then these are zipped - with the report tuples returned by runTests and evaluated using assertParseResultsEquals. - Finally, asserts that the overall runTests() success value is True. - - :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests - :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] - """ - run_test_success, run_test_results = run_tests_report - - if expected_parse_results is not None: - merged = [ - (rpt[0], rpt[1], expected) - for rpt, expected in zip(run_test_results, expected_parse_results) - ] - for test_string, result, expected in merged: - # expected should be a tuple containing a list and/or a dict or an exception, - # and optional failure message string - # an empty tuple will skip any result validation - fail_msg = next( - (exp for exp in expected if isinstance(exp, str)), None - ) - expected_exception = next( - ( - exp - for exp in expected - if isinstance(exp, type) and issubclass(exp, Exception) - ), - None, - ) - if expected_exception is not None: - with self.assertRaises( - expected_exception=expected_exception, msg=fail_msg or msg - ): - if isinstance(result, Exception): - raise result - else: - expected_list = next( - (exp for exp in expected if isinstance(exp, list)), None - ) - expected_dict = next( - (exp for exp in expected if isinstance(exp, dict)), None - ) - if (expected_list, expected_dict) != (None, None): - self.assertParseResultsEquals( - result, - expected_list=expected_list, - expected_dict=expected_dict, - msg=fail_msg or msg, - ) - else: - # warning here maybe? - print("no validation for {!r}".format(test_string)) - - # do this last, in case some specific test results can be reported instead - self.assertTrue( - run_test_success, msg=msg if msg is not None else "failed runTests" - ) - - @contextmanager - def assertRaisesParseException(self, exc_type=ParseException, msg=None): - with self.assertRaises(exc_type, msg=msg): - yield - - -if __name__ == "__main__": - - selectToken = CaselessLiteral("select") - fromToken = CaselessLiteral("from") - - ident = Word(alphas, alphanums + "_$") - - columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - columnNameList = Group(delimitedList(columnName)).setName("columns") - columnSpec = ('*' | columnNameList) - - tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) - tableNameList = Group(delimitedList(tableName)).setName("tables") - - simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") - - # demo runTests method, including embedded comments in test string - simpleSQL.runTests(""" - # '*' as column list and dotted table name - select * from SYS.XYZZY - - # caseless match on "SELECT", and casts back to "select" - SELECT * from XYZZY, ABC - - # list of column names, and mixed case SELECT keyword - Select AA,BB,CC from Sys.dual - - # multiple tables - Select A, B, C from Sys.dual, Table2 - - # invalid SELECT keyword - should fail - Xelect A, B, C from Sys.dual - - # incomplete command - should fail - Select - - # invalid column name - should fail - Select ^^^ frox Sys.dual - - """) - - pyparsing_common.number.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - # any int or real number, returned as float - pyparsing_common.fnumber.runTests(""" - 100 - -100 - +100 - 3.14159 - 6.02e23 - 1e-12 - """) - - pyparsing_common.hex_integer.runTests(""" - 100 - FF - """) - - import uuid - pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) - pyparsing_common.uuid.runTests(""" - 12345678-1234-5678-1234-567812345678 - """) diff --git a/pyproject.toml b/pyproject.toml index ac4b75d..1d0cb3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,6 @@ exclude = ''' [tool.vendy] target = 'pip_api' packages = [ - "packaging==21.0", - "pyparsing==2.4.7", + "packaging==23.2", + "packaging_legacy==23.0.post0", ] diff --git a/setup.py b/setup.py index 022dd7b..e838556 100644 --- a/setup.py +++ b/setup.py @@ -27,5 +27,5 @@ python_requires=">=3.7", url="http://github.com/di/pip-api", summary="An unofficial, importable pip API", - version="0.0.30", + version="0.0.31", )