Skip to content

Commit 9af1801

Browse files
author
Sven Siegmund
committed
Parsing can start
1 parent 4f5f8f9 commit 9af1801

File tree

17 files changed

+833
-0
lines changed

17 files changed

+833
-0
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ __pycache__/
33
*.py[cod]
44
*$py.class
55

6+
# Vim files
7+
*~
8+
*.swp
9+
*.swo
10+
11+
# RTF
12+
*.rtf
13+
614
# C extensions
715
*.so
816

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,14 @@
11
# pyrtfparse
2+
23
RTF Parser
4+
5+
## Call for collaboration
6+
7+
This RTF parser needs your collaboration. Refer to the RTF Specification and create new parsable objects.
8+
9+
# RTF Specification Links
10+
11+
If you find a working official Microsoft link to the RTF specification and add it here, you'll be remembered fondly.
12+
13+
* [Swissmains Link to RTF Spec 1.9.1](https://manuals.swissmains.com/pages/viewpage.action?pageId=1376332&preview=%2F1376332%2F10620104%2FWord2007RTFSpec9.pdf)
14+
* [Webarchive Link to RTF Spec 1.9.1](https://web.archive.org/web/20190708132914/http://www.kleinlercher.at/tools/Windows_Protocols/Word2007RTFSpec9.pdf)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
argcomplete

setup.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/usr/bin/env python
2+
3+
from setuptools import setup
4+
from setuptools import find_packages
5+
import pathlib
6+
import re
7+
8+
9+
this_dir = pathlib.Path(__file__).parent.absolute()
10+
project_name = "pyrtfparse"
11+
package_dir = "src"
12+
path_to_init_file = this_dir / package_dir / project_name / "__init__.py"
13+
14+
15+
with open(this_dir / "README.md", encoding="utf-8") as file:
16+
long_description = file.read()
17+
18+
19+
with open(this_dir / "requirements.txt", encoding="utf-8") as reqs:
20+
requirements = [line.strip() for line in reqs]
21+
22+
23+
def get_property(property: str, path_to_init_file: pathlib.Path) -> str:
24+
"""
25+
Reads a property from the project's __init__.py
26+
e.g. get_property("__version__") --> "1.2.3"
27+
"""
28+
regex = re.compile(r"{}\s*=\s*[\"'](?P<value>[^\"']*)[\"']".format(property))
29+
try:
30+
with open(path_to_init_file) as initfh:
31+
try:
32+
result = regex.search(initfh.read()).group("value")
33+
except AttributeError:
34+
result = None
35+
except FileNotFoundError:
36+
result = None
37+
return result
38+
39+
40+
setup(
41+
name=project_name,
42+
version=get_property("version", path_to_init_file.parent / "version.py"),
43+
description="RTF parser",
44+
long_description=long_description,
45+
author=get_property("__author__", path_to_init_file),
46+
author_email=get_property("__author_email__", path_to_init_file),
47+
url="https://github.com/Nagidal/pyrtfparse",
48+
classifiers=[
49+
"Development Status :: 2 - Pre-Alpha"
50+
# "Development Status :: 3 - Alpha",
51+
# "Development Status :: 4 - Beta",
52+
# "Development Status :: 5 - Production/Stable"
53+
# "Intended Audience :: End Users/Desktop",
54+
"Intended Audience :: Developers",
55+
"Intended Audience :: System Administrators",
56+
"Environment :: Console",
57+
"Topic :: Software Development :: Testing",
58+
"Topic :: Utilities",
59+
"License :: Free To Use But Restricted",
60+
"Natural Language :: English",
61+
"Programming Language :: Python :: 3.9",
62+
"Operating System :: OS Independent",
63+
"Operating System :: Microsoft :: Windows"
64+
"Operating System :: POSIX :: Linux",
65+
"Operating System :: MacOS :: MacOS X",
66+
],
67+
keywords="parsing rtf",
68+
package_dir={"": package_dir},
69+
packages=find_packages(where=package_dir),
70+
package_data={
71+
project_name: [],
72+
},
73+
python_requires=">=3.9",
74+
install_requires=requirements,
75+
entry_points={
76+
"console_scripts": [f"{project_name} = {project_name}.__main__:{project_name}",
77+
],
78+
},
79+
platforms=["any"],
80+
)

src/pyrtfparse/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env python
2+
3+
4+
"""
5+
Parser of Microsoft RTF Format
6+
"""
7+
8+
9+
from pyrtfparse import version
10+
11+
12+
__author__ = "Sven Siegmund"
13+
__author_email__ = "[email protected]"
14+
__date__ = "2020-12-21"
15+
__version__ = version.version
16+
17+
18+

src/pyrtfparse/__main__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env python
2+
3+
4+
import sys
5+
from pyrtfparse import version
6+
from pyrtfparse import entry
7+
8+
9+
def pyrtfparse():
10+
sys.exit(entry.cli_start(version.version))
11+
12+
13+
if __name__ == "__main__":
14+
pyrtfparse()

src/pyrtfparse/config_loader.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
#!/usr/bin/env python
2+
3+
4+
import pathlib
5+
import configparser
6+
import logging
7+
import sys
8+
import traceback
9+
import re
10+
import dataclasses
11+
import urllib.parse
12+
from collections import OrderedDict
13+
# Own modules
14+
from pyrtfparse import errors
15+
from pyrtfparse import utils
16+
from pyrtfparse import menu
17+
18+
19+
# setup logging
20+
logger = logging.getLogger(__name__)
21+
22+
23+
@dataclasses.dataclass
24+
class Preconfigured_Path():
25+
internal_name: str
26+
path: dataclasses.InitVar[pathlib.Path]
27+
comment: str
28+
def __post_init__(self, path) -> None:
29+
self.path = pathlib.Path(path)
30+
31+
32+
class Config():
33+
"""
34+
Holds all confituration data readily available as attributes
35+
"""
36+
def __init__(self, cfg_path: pathlib.Path, autoconfig: bool) -> None:
37+
self.error_regex = re.compile(r"""config_parser\.get.*\(["'](?P<section>\w+)["'], *["'](?P<variable>\w+)["']\)""")
38+
self.path_to_config_file = cfg_path
39+
self.path_to_home = utils.provide_dir(self.path_to_config_file.parent)
40+
self.path_to_pyrtfparse_home = pathlib.Path.home() / utils.home_dir_name
41+
self._subdir_dir = Preconfigured_Path(
42+
internal_name="subdir_dir",
43+
path=self.path_to_pyrtfparse_home / "subdir",
44+
comment="some subdir",
45+
)
46+
self._wizard_has_run = False
47+
self.autoconfig = autoconfig
48+
self.read_config_file()
49+
self.check_paths = (self._subdir_dir,
50+
)
51+
self.integrity_check()
52+
def __enter__(self):
53+
return self
54+
def __exit__(self, exc_type, exc_value, traceback) -> None:
55+
# Deleting config during development. Comment after release.
56+
# self.delete_config_file()
57+
pass
58+
def reset_parser(self) -> None:
59+
self.config_parser = configparser.ConfigParser(allow_no_value=True)
60+
self.config_parser.optionxform = str
61+
def read_config_file(self) -> None:
62+
"""
63+
Reads current configuration file or creates a new one with a default configuration
64+
"""
65+
self.reset_parser()
66+
try:
67+
self.config_parser.read_file(open(self.path_to_config_file))
68+
self.parse()
69+
logger.debug(f"{self.path_to_config_file.name} read")
70+
except FileNotFoundError:
71+
logger.info("Config file missing, creating new default config file")
72+
self.create_config_file()
73+
def integrity_check(self) -> None:
74+
try:
75+
for preconf_path in self.check_paths:
76+
path_to_check = preconf_path.path
77+
assert path_to_check.exists()
78+
except AssertionError as e:
79+
logger.debug(f"Path not found, starting wizard")
80+
self.wizard(errors.WrongConfiguration(f"{self.path_to_config_file.name}: '{str(path_to_check)}', path does not exist!", preconf_path), autoconfig=self.autoconfig)
81+
def create_config_file(self) -> None:
82+
"""
83+
Creates the default config file
84+
"""
85+
self.reset_parser()
86+
self.config_parser.add_section("Paths")
87+
self.config_parser.set("Paths", "# You can write paths in Windows format or Linux/POSIX format.")
88+
self.config_parser.set("Paths", "# A trailing '/' at the end of the final directory in a POSIX path")
89+
self.config_parser.set("Paths", "# or a '\\' at the end of the final directory of a Windows path")
90+
self.config_parser.set("Paths", "# does not interfere with the path parser.")
91+
self.config_parser.set("Paths", "")
92+
for preconf_path in (
93+
self._subdir_dir,
94+
):
95+
self.config_parser.set("Paths", f"# {preconf_path.comment[0].capitalize()}{preconf_path.comment[1:]}")
96+
self.config_parser.set("Paths", f"{preconf_path.internal_name}", f"{preconf_path.path}")
97+
with open(self.path_to_config_file, mode="w", encoding="utf-8") as configfh:
98+
self.config_parser.write(configfh)
99+
self.read_config_file()
100+
def delete_config_file(self) -> None:
101+
"""
102+
Serves debugging purposes. Deletes the config file.
103+
"""
104+
try:
105+
self.path_to_config_file.unlink()
106+
logger.info(f"{self.path_to_config_file.name} deleted")
107+
except FileNotFoundError as exc:
108+
logger.error(f"Could not delete {self.path_to_config_file.name} because it does not exist")
109+
def getpath(self, section: str, value: str) -> pathlib.Path:
110+
"""
111+
Returns value from config file as pathlib.Path object
112+
"""
113+
return pathlib.Path(self.config_parser.get(section, value))
114+
def geturl(self, section: str, raw_url: str) -> urllib.parse.ParseResult:
115+
"""
116+
Parses a URL and returns urllib ParseResult
117+
"""
118+
return urllib.parse.urlparse(self.config_parser.get(section, raw_url))
119+
def parse(self) -> None:
120+
"""
121+
Parses the configuration files into usable attributes
122+
"""
123+
try:
124+
self.subdir_dir = self.getpath("Paths", "subdir_dir")
125+
except ValueError:
126+
exc_type, exc_value, exc_traceback = sys.exc_info()
127+
lines = traceback.format_exc().splitlines()
128+
section, variable, value = False, False, False
129+
for line in lines:
130+
if "config_parser" in line:
131+
match = re.search(self.error_regex, line)
132+
if match:
133+
section = match.group("section")
134+
variable = match.group("variable")
135+
value = lines[-1].split()[-1]
136+
if section and variable and value:
137+
message = f"{self.path_to_config_file.name}: '{variable}' in section '{section}' has an unacceptable value of {value}"
138+
raise errors.WrongConfiguration(message, None)
139+
else:
140+
raise
141+
except configparser.NoOptionError as err:
142+
raise errors.WrongConfiguration(f"{self.path_to_config_file.name}: {err.message}", err)
143+
except configparser.NoSectionError as err:
144+
raise errors.WrongConfiguration(f"{self.path_to_config_file.name}: {err.message}", err)
145+
except Exception as err:
146+
raise errors.WrongConfiguration(f"There is something wrong with {self.path_to_config_file.name}. Please check it carefully or delete it to have it recreated.", err)
147+
def configure_paths(self, preconf_path: Preconfigured_Path, manually: bool) -> None:
148+
logger.debug(f"{preconf_path.internal_name} needs to be reconfigured")
149+
if manually:
150+
logger.debug(f"Configuring paths manually")
151+
while True:
152+
try:
153+
path_to_create = utils.input_path(f"Input a {preconf_path.comment}: ")
154+
created_path = utils.provide_dir(path_to_create)
155+
break
156+
except OSError as err:
157+
logger.error(err)
158+
continue
159+
else:
160+
logger.debug(f"Configuring paths automatically")
161+
created_path = utils.provide_dir(preconf_path.path)
162+
preconf_path.path = created_path
163+
self.create_config_file()
164+
self.integrity_check()
165+
def wizard(self, error: errors.WrongConfiguration, autoconfig: bool) -> None:
166+
"""
167+
Configuration wizard guides the user through the initial setup process
168+
"""
169+
wiz_menu = menu.Text_Menu(menu_name="Configuration Wizard", heading=r"""
170+
____ ____ __ _ ____ _ ____ _ _ ____ ____ ___ _ ____ __ _
171+
|___ [__] | \| |--- | |__, |__| |--< |--| | | [__] | \|
172+
_ _ _ ___ ____ ____ ___
173+
|/\| | /__ |--| |--< |__>
174+
""")
175+
if not self._wizard_has_run:
176+
wiz_menu.show_heading()
177+
self._wizard_has_run = True
178+
if "path does not exist" in error.message:
179+
reason = (f"{error.payload.internal_name} ({error.payload.path}) does not exist!")
180+
options = OrderedDict((
181+
("A", "Automatically configure this and all remaining claws settings"),
182+
("C", "Create this path automatically"),
183+
("M", "Manually input correct path to use or to create"),
184+
("Q", f"Quit and edit `{error.payload.internal_name}` in {self.path_to_config_file.name}"),
185+
))
186+
if autoconfig:
187+
choice = "C"
188+
else:
189+
choice = None
190+
if not choice:
191+
wiz_menu.show_reason(reason)
192+
choice = wiz_menu.choose_from(options)
193+
if choice == "A":
194+
self.autoconfig = True
195+
logger.debug(f"Your choice: {choice}")
196+
if choice == "C":
197+
self.configure_paths(error.payload, manually=False)
198+
elif choice == "M":
199+
self.configure_paths(error.payload, manually=True)
200+
elif choice == "Q":
201+
raise errors.WrongConfiguration(f"Who needs a wizard, when you can edit `{self.path_to_config_file.name}` yourself, right?", None)
202+
self.integrity_check()
203+
else:
204+
raise NotImplementedError(f"Starting configuration wizard with {err.message} is not implemented yet")
205+
206+
207+
if __name__ == "__main__":
208+
pass

src/pyrtfparse/cwtypes.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env python
2+
3+
4+
"""
5+
Control Word Types
6+
"""
7+
8+
9+
from pyrtfparse import re_patterns
10+
11+
12+
class Cwtype:
13+
default_delimiter = "\r\n"
14+
15+
16+
class Flag(Cwtype):
17+
native_pattern = re_patterns.control_word
18+
def __init__(self, pattern: str) -> None:
19+
self.something = self.native_pattern.pattern.match((pattern + self.default_delimiter).encode("ascii"))
20+
21+
22+
if __name__ == "__main__":
23+
pass

0 commit comments

Comments
 (0)