Skip to content

Commit 376c2f9

Browse files
authored
Merge pull request #6
* Support for saving summary in multiple formats. * Support for parsing reported NAV for each scheme.
1 parent 0380194 commit 376c2f9

File tree

9 files changed

+162
-66
lines changed

9 files changed

+162
-66
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ data = casparser.read_cas_pdf('/path/to/cas/pdf/file.pdf', 'password')
6363
"close": "number",
6464
"valuation": {
6565
"date": "date",
66+
"nav": "number",
6667
"value": "number"
6768
},
6869
"transactions": [

assets/demo.jpg

-20.1 KB
Loading

casparser/cli.py

Lines changed: 138 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,178 @@
11
from decimal import Decimal
22
import json
3+
import os
34
import re
5+
import shutil
46
import sys
7+
import textwrap
58

69
import click
7-
import texttable
10+
11+
# noinspection PyProtectedMember
12+
from tabulate import tabulate, _table_formats
13+
814

915
from .__version__ import __version__
1016

1117
from . import read_cas_pdf
1218
from .encoder import CASDataEncoder
1319
from .exceptions import ParserException
20+
from .parsers.utils import isclose
1421

1522
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
1623

1724

18-
# noinspection PyUnusedLocal
19-
def validate_output_filename(ctx, param, filename: str):
20-
"""Validate whether output filename ends with json."""
21-
if filename is None or filename.lower().endswith(".json"):
22-
return filename
23-
raise click.BadParameter("Output filename should end with .json")
24-
25-
26-
def print_summary(data):
25+
def print_summary(data, tablefmt="fancy_grid", output_filename=None, include_zero_folios=False):
2726
"""Print summary of parsed data."""
2827
count = 0
2928
err = 0
30-
click.echo("\n")
31-
click.echo(
32-
f"{'Statement Period':>40s}: "
33-
f"{click.style(data['statement_period']['from'], fg='green', bold=True)}"
34-
f" To {click.style(data['statement_period']['to'], fg='green', bold=True)}"
35-
)
36-
click.echo(f"{'File Type':>40s}: {click.style(data['file_type'], bold=True)}")
37-
for key, value in data["investor_info"].items():
38-
fmt_value = " ".join([x.strip() for x in value.splitlines()])
39-
fmt_value = re.sub(r"\s+", " ", fmt_value)
40-
if len(fmt_value) > 40:
41-
fmt_value = fmt_value[:37] + "..."
42-
click.echo(f"{key:>40s}: {fmt_value}")
43-
click.echo("")
44-
table = texttable.Texttable(max_width=120)
45-
header = [
29+
30+
if output_filename:
31+
fmt = "fancy_grid"
32+
else:
33+
fmt = tablefmt
34+
35+
print_extra_info = fmt in ("simple", "plain", "fancy_grid", "grid", "pretty")
36+
if print_extra_info:
37+
click.echo("\n")
38+
click.echo(
39+
f"{'Statement Period':>40s}: "
40+
f"{click.style(data['statement_period']['from'], fg='green', bold=True)}"
41+
f" To {click.style(data['statement_period']['to'], fg='green', bold=True)}"
42+
)
43+
click.echo(f"{'File Type':>40s}: {click.style(data['file_type'], bold=True)}")
44+
for key, value in data["investor_info"].items():
45+
fmt_value = " ".join([x.strip() for x in value.splitlines()])
46+
fmt_value = re.sub(r"\s+", " ", fmt_value)
47+
if len(fmt_value) > 40:
48+
fmt_value = fmt_value[:37] + "..."
49+
click.echo(f"{key:>40s}: {fmt_value}")
50+
click.echo("")
51+
rows = []
52+
console_rows = []
53+
54+
console_header = [
4655
"Scheme",
4756
"Open",
48-
"Close\nReported",
49-
"Close\nCalculated",
57+
"Close\n\nReported\nvs.\nCalculated",
5058
f"Value\n({data['statement_period']['to']})",
59+
"Txns",
60+
"",
61+
]
62+
header = [
63+
"Scheme",
64+
"Open",
65+
"Close Reported",
66+
"Close Calculated",
67+
f"NAV ({data['statement_period']['to']})",
68+
f"Value ({data['statement_period']['to']})",
5169
"Transactions",
5270
"Status",
5371
]
54-
table.add_row(header)
55-
table.set_cols_align(["l"] + ["r"] * (len(header) - 2) + ["c"])
56-
table.set_cols_valign(["m"] * len(header))
72+
col_align = ["left"] + ["right"] * (len(header) - 2) + ["center"]
73+
console_col_align = ["left"] + ["right"] * (len(console_header) - 2) + ["center"]
74+
5775
current_amc = None
5876
value = Decimal(0)
77+
columns, _ = shutil.get_terminal_size()
78+
scheme_col_width = columns - 66
79+
80+
folio_header_added = False
5981
for folio in data["folios"]:
6082
if current_amc != folio.get("amc", ""):
83+
folio_header_added = False
6184
current_amc = folio["amc"]
62-
table.add_row([current_amc] + [""] * 6)
6385
for scheme in folio["schemes"]:
86+
87+
if scheme["close"] < 1e-3 and not include_zero_folios:
88+
continue
89+
6490
calc_close = scheme["open"] + sum([x["units"] for x in scheme["transactions"]])
6591
valuation = scheme["valuation"]
66-
if calc_close != scheme["close"]:
92+
93+
# Check is calculated close (i.e. open + units from all transactions) is same as
94+
# reported close and also the scheme valuation = nav * calculated close.
95+
if calc_close != scheme["close"] or not isclose(
96+
valuation["nav"] * calc_close, valuation["value"], tol=2
97+
):
6798
err += 1
6899
status = "❗️"
69100
else:
70101
status = "️✅"
71-
scheme_name = f"{scheme['scheme']}\nFolio: {folio['folio']}"
102+
wrapped_name = textwrap.fill(scheme["scheme"], width=scheme_col_width)
103+
folio_number = re.sub(r"\s+", "", folio["folio"])
104+
folio_string = textwrap.fill(f"Folio: {folio_number}", width=scheme_col_width)
105+
scheme_name = f"{wrapped_name}\n{folio_string}"
72106
value += valuation["value"]
73-
table.add_row(
107+
108+
if not folio_header_added:
109+
rows.append(
110+
[textwrap.fill(current_amc, width=scheme_col_width)] + [""] * (len(header) - 1)
111+
)
112+
console_rows.append(
113+
[textwrap.fill(current_amc, width=scheme_col_width)]
114+
+ [""] * (len(console_header) - 1)
115+
)
116+
folio_header_added = True
117+
118+
console_rows.append(
119+
[
120+
scheme_name,
121+
scheme["open"],
122+
f"{scheme['close']}\n/\n{calc_close}",
123+
f"₹{valuation['value']:,.2f}\n@\n{valuation['nav']:,.2f}",
124+
len(scheme["transactions"]),
125+
status,
126+
]
127+
)
128+
rows.append(
74129
[
75130
scheme_name,
76131
scheme["open"],
77132
scheme["close"],
78133
calc_close,
79-
f"₹{valuation['value']:,.2f}",
134+
valuation["nav"],
135+
valuation["value"],
80136
len(scheme["transactions"]),
81137
status,
82138
]
83139
)
84140
count += 1
85-
click.echo(table.draw())
86-
click.echo(
87-
"Portfolio Valuation : "
88-
+ click.style(f"₹{value:,.2f}", fg="green", bold=True)
89-
+ f" [As of {data['statement_period']['to']}]"
90-
)
91-
click.secho("Summary", bold=True)
92-
click.echo("Total : " + click.style(f"{count:4d}", fg="white", bold=True) + " schemes")
93-
click.echo("Matched : " + click.style(f"{count - err:4d}", fg="green", bold=True) + " schemes")
94-
click.echo("Error : " + click.style(f"{err:4d}", fg="red", bold=True) + " schemes")
141+
if print_extra_info:
142+
click.echo(tabulate(console_rows, console_header, tablefmt=fmt, colalign=console_col_align))
143+
click.echo(
144+
"Portfolio Valuation : "
145+
+ click.style(f"₹{value:,.2f}", fg="green", bold=True)
146+
+ f" [As of {data['statement_period']['to']}]"
147+
)
148+
click.secho("Summary", bold=True)
149+
click.echo("Total : " + click.style(f"{count:4d}", fg="white", bold=True) + " schemes")
150+
click.echo(
151+
"Matched : " + click.style(f"{count - err:4d}", fg="green", bold=True) + " schemes"
152+
)
153+
click.echo("Error : " + click.style(f"{err:4d}", fg="red", bold=True) + " schemes")
154+
else:
155+
click.echo(tabulate(rows, header, tablefmt=fmt, colalign=col_align))
156+
157+
if output_filename:
158+
with open(output_filename, "w") as f:
159+
f.write(tabulate(rows, header, tablefmt=tablefmt, colalign=col_align))
160+
click.echo("File saved : " + click.style(output_filename, bold=True))
95161

96162

97163
@click.command(name="casparser", context_settings=CONTEXT_SETTINGS)
98164
@click.option(
99165
"-o",
100166
"--output",
101-
help="Output file path (json)",
102-
callback=validate_output_filename,
103-
type=click.Path(exists=False, dir_okay=False, writable=True),
167+
help="Output file path",
168+
type=click.Path(dir_okay=False, writable=True),
169+
)
170+
@click.option(
171+
"-s",
172+
"--summary",
173+
type=click.Choice(_table_formats.keys()),
174+
help="Print Summary of transactions parsed.",
104175
)
105-
@click.option("-s", "--summary", is_flag=True, help="Print Summary of transactions parsed.")
106176
@click.option(
107177
"-p",
108178
"password",
@@ -112,24 +182,36 @@ def print_summary(data):
112182
confirmation_prompt=False,
113183
help="CAS password",
114184
)
185+
@click.option(
186+
"-a",
187+
"--include-all",
188+
is_flag=True,
189+
help="Include schemes with zero valuation in the summary output"
190+
)
115191
@click.option(
116192
"--force-pdfminer", is_flag=True, help="Force PDFMiner parser even if MuPDF is detected"
117193
)
118194
@click.version_option(__version__, prog_name="casparser-cli")
119195
@click.argument("filename", type=click.Path(exists=True), metavar="CAS_PDF_FILE")
120-
def cli(output, summary, password, force_pdfminer, filename):
196+
def cli(output, summary, password, include_all, force_pdfminer, filename):
121197
"""CLI function."""
122-
if output is None and not summary:
123-
click.echo("No output file provided. Printing summary")
124-
summary = True
198+
output_ext = None
199+
if output is not None:
200+
output_ext = os.path.splitext(output)[-1].lower()
201+
202+
if not (summary or output_ext == ".json"):
203+
summary = "fancy_grid"
204+
125205
try:
126206
data = read_cas_pdf(filename, password, force_pdfminer=force_pdfminer)
127207
except ParserException as exc:
128208
click.echo("Error parsing pdf file :: " + click.style(str(exc), bold=True, fg="red"))
129209
sys.exit(1)
130210
if summary:
131-
print_summary(data)
132-
if output is not None:
211+
print_summary(
212+
data, tablefmt=summary, include_zero_folios=include_all, output_filename=None if output_ext == ".json" else output
213+
)
214+
if output_ext == ".json":
133215
with open(output, "w") as fp:
134216
json.dump(data, fp, cls=CASDataEncoder, indent=2)
135217
click.echo("File saved : " + click.style(output, bold=True))

casparser/process.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from .exceptions import HeaderParseError, CASParseError
77
from .regex import FOLIO_RE, HEADER_RE, SCHEME_RE
8-
from .regex import CLOSE_UNITS_RE, OPEN_UNITS_RE, VALUATION_RE, TRANSACTION_RE
8+
from .regex import CLOSE_UNITS_RE, NAV_RE, OPEN_UNITS_RE, VALUATION_RE, TRANSACTION_RE
99

1010

1111
def parse_header(text):
@@ -64,7 +64,7 @@ def process_cas_text(text):
6464
"rta": m.group(4).strip(),
6565
"open": Decimal(0.0),
6666
"close": Decimal(0.0),
67-
"valuation": {"date": None, "value": 0},
67+
"valuation": {"date": None, "value": 0, "nav": 0},
6868
"transactions": [],
6969
}
7070
if not curr_scheme_data:
@@ -79,6 +79,11 @@ def process_cas_text(text):
7979
date=date_parser.parse(m.group(1)).date(),
8080
value=Decimal(m.group(2).replace(",", "_")),
8181
)
82+
if m := re.search(NAV_RE, line, re.I):
83+
curr_scheme_data["valuation"].update(
84+
date=date_parser.parse(m.group(1)).date(),
85+
nav=Decimal(m.group(2).replace(",", "_")),
86+
)
8287
continue
8388
if m := re.search(TRANSACTION_RE, line, re.DOTALL):
8489
date = date_parser.parse(m.group(1)).date()

casparser/regex.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
OPEN_UNITS_RE = r"Opening\s+Unit\s+Balance.+?([\d,.]+)"
1010
CLOSE_UNITS_RE = r"Closing\s+Unit\s+Balance.+?([\d,.]+)"
1111
VALUATION_RE = r"Valuation\s+on\s+(\d{2}-[A-Za-z]{3}-\d{4})\s*:\s*INR\s*([\d,.]+)"
12+
NAV_RE = r"NAV\s+on\s+(\d{2}-[A-Za-z]{3}-\d{4})\s*:\s*INR\s*([\d,.]+)"
1213

1314
TRANSACTION_RE = (
1415
r"(\d{2}-[A-Za-z]{3}-\d{4})\t\t([^\t]+?)\t\t([(\d,.]+)\)*\t\t"

casparser/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class SchemeValuationType(TypedDict):
3131
"""Scheme valuation as of a given date."""
3232

3333
date: Union[date, str]
34+
nav: Union[Decimal, float]
3435
value: Union[Decimal, float]
3536

3637

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ click==7.1.2
22
colorama==0.4.4
33
pdfminer.six==20201018
44
python-dateutil==2.8.1
5-
texttable==1.6.3
5+
tabulate==0.8.7
66
wcwidth==0.2.5

setup.cfg

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ install_requires =
2424
click>=7.1.2
2525
colorama>=0.4.4
2626
pdfminer.six>=20201018
27-
python-dateutil==2.8.1
28-
texttable>=1.6.3
29-
wcwidth==0.2.5
27+
python-dateutil>=2.8.1
28+
tabulate>=0.8.7
29+
wcwidth>=0.2.5
3030

3131
[options.entry_points]
3232
console_scripts =

tests/test_mupdf.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,24 @@ def test_cli(self, tmpdir):
2626
assert result.exit_code == 0
2727
assert "File saved" in result.output
2828

29-
fpath = tmpdir.join("output.txt")
29+
fpath = tmpdir.join("output.html")
3030
result = runner.invoke(
31-
cli, [self.cams_file_name, "-p", self.cams_password, "-o", fpath.strpath]
31+
cli, [self.cams_file_name, "-p", self.cams_password, "-o", fpath.strpath, "-s", "html"]
32+
)
33+
assert result.exit_code != 1
34+
assert "File saved" in result.output
35+
36+
result = runner.invoke(
37+
cli, [self.cams_file_name, "-p", self.cams_password, "-s", "html"]
3238
)
3339
assert result.exit_code != 1
34-
assert "Output filename should end" in result.output
40+
assert "<table>\n<thead>" in result.output
3541

3642
result = runner.invoke(cli, [self.kfintech_file_name, "-p", self.cams_password])
3743
assert result.exit_code != 0
3844
assert "Incorrect PDF password!" in result.output
3945

40-
result = runner.invoke(cli, [self.bad_file_name, "-p", ""])
46+
result = runner.invoke(cli, [self.bad_file_name, "-p", "", "-a"])
4147
assert result.exit_code == 0
4248
assert re.search(r"Error\s+:\s+1\s+schemes", result.output) is not None
4349

0 commit comments

Comments
 (0)