Skip to content

Commit 0663b54

Browse files
committed
fix crash while parsing multi-line transactions
1 parent 2800891 commit 0663b54

File tree

3 files changed

+21
-20
lines changed

3 files changed

+21
-20
lines changed

casparser/process/cas_detailed.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from ..exceptions import HeaderParseError, CASParseError
99
from .regex import DETAILED_DATE_RE, FOLIO_RE, SCHEME_RE, REGISTRAR_RE
1010
from .regex import CLOSE_UNITS_RE, NAV_RE, OPEN_UNITS_RE, VALUATION_RE
11-
from .regex import DESCRIPTION_TAIL_RE, DIVIDEND_RE, TRANSACTION_RE
12-
from ..types import FolioType
11+
from .regex import DIVIDEND_RE, TRANSACTION_RE1, TRANSACTION_RE2
12+
from ..types import FolioType, SchemeType
1313
from .utils import isin_search
1414

1515

@@ -31,7 +31,8 @@ def get_transaction_type(
3131
dividend_rate = None
3232
description = description.lower()
3333
if div_match := re.search(DIVIDEND_RE, description, re.I | re.DOTALL):
34-
reinvest_flag, dividend_rate = div_match.groups()
34+
reinvest_flag, dividend_str = div_match.groups()
35+
dividend_rate = Decimal(dividend_str)
3536
txn_type = (
3637
TransactionType.DIVIDEND_REINVEST if reinvest_flag else TransactionType.DIVIDEND_PAYOUT
3738
)
@@ -73,6 +74,12 @@ def get_transaction_type(
7374
return txn_type, dividend_rate
7475

7576

77+
def parse_transaction(line):
78+
for regex in (TRANSACTION_RE1, TRANSACTION_RE2):
79+
if m := re.search(regex, line, re.DOTALL | re.MULTILINE | re.I):
80+
return m
81+
82+
7683
def process_detailed_text(text):
7784
"""
7885
Process the text version of a CAS pdf and return the detailed summary.
@@ -93,11 +100,6 @@ def process_detailed_text(text):
93100
# "Registrar" column to the previous line
94101
if re.search(REGISTRAR_RE, line):
95102
line = "\t\t".join([lines[idx + 1], line])
96-
if m := re.search(DESCRIPTION_TAIL_RE, line, re.I | re.DOTALL):
97-
description_tail = m.group(1).rstrip()
98-
line = line.replace(description_tail, "")
99-
else:
100-
description_tail = ""
101103
if amc_match := re.search(r"^(.+?)\s+(MF|Mutual\s+Fund)$", line, re.I | re.DOTALL):
102104
current_amc = amc_match.group(0)
103105
elif m := re.search(FOLIO_RE, line, re.I | re.DOTALL):
@@ -128,7 +130,7 @@ def process_detailed_text(text):
128130
rta = m.group(4).strip()
129131
rta_code = m.group(1).strip()
130132
isin, amfi = isin_search(scheme, rta, rta_code)
131-
curr_scheme_data = {
133+
curr_scheme_data: SchemeType = {
132134
"scheme": scheme,
133135
"advisor": advisor,
134136
"rta_code": rta_code,
@@ -138,7 +140,7 @@ def process_detailed_text(text):
138140
"open": Decimal(0.0),
139141
"close": Decimal(0.0),
140142
"close_calculated": Decimal(0.0),
141-
"valuation": {"date": None, "value": 0, "nav": 0},
143+
"valuation": {"date": None, "value": Decimal(0.0), "nav": Decimal(0.0)},
142144
"transactions": [],
143145
}
144146
if not curr_scheme_data:
@@ -161,9 +163,9 @@ def process_detailed_text(text):
161163
nav=Decimal(m.group(2).replace(",", "_")),
162164
)
163165
continue
164-
if m := re.search(TRANSACTION_RE, line, re.DOTALL):
166+
if m := parse_transaction(line):
165167
date = date_parser.parse(m.group(1)).date()
166-
desc = m.group(2).strip() + description_tail
168+
desc = m.group(2).strip()
167169
amt = Decimal(m.group(3).replace(",", "_").replace("(", "-"))
168170
if m.group(4) is None:
169171
units = None

casparser/process/regex.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""Regular expressions for parsing various sections in CAS."""
22

3+
date_re = r"(\d{2}-[A-Za-z]{3}-\d{4})"
4+
amt_re = r"([(-]*\d[\d,.]+)\)*"
5+
36
CAS_TYPE_RE = r"consolidated\s+account\s+(statement|summary)"
47
DETAILED_DATE_RE = r"(?P<from>\d{2}-[a-zA-Z]{3}-\d{4})\s+to\s+(?P<to>\d{2}-[a-zA-Z]{3}-\d{4})"
58
SUMMARY_DATE_RE = r"as\s+on\s+(?P<date>\d{2}-[a-zA-Z]{3}-\d{4})"
@@ -21,10 +24,6 @@
2124
VALUATION_RE = r"Valuation\s+on\s+(\d{2}-[A-Za-z]{3}-\d{4})\s*:\s*INR\s*([\d,.]+)"
2225
NAV_RE = r"NAV\s+on\s+(\d{2}-[A-Za-z]{3}-\d{4})\s*:\s*INR\s*([\d,.]+)"
2326

24-
TRANSACTION_RE = (
25-
r"(\d{2}-[A-Za-z]{3}-\d{4})\t\t([^\t]+?)\t\t([(\d,.]+)\)*"
26-
r"(?:\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*\t\t([-(\d,.]+)\)*)*"
27-
)
28-
DIVIDEND_RE = r"dividend.+?(reinvest)*.*?@\s+Rs\.\s*([\d\.]+)\s+per\s+unit"
29-
30-
DESCRIPTION_TAIL_RE = r"\d{2}-[A-Za-z]{3}-\d{4}\t\t.*(\n[^\t]+)[\t|$]"
27+
TRANSACTION_RE1 = rf"{date_re}\t\t([^0-9].*)\t\t{amt_re}\t\t{amt_re}\t\t{amt_re}\t\t{amt_re}"
28+
TRANSACTION_RE2 = rf"{date_re}\t\t([^0-9].*)\t\t{amt_re}(?:\t\t{amt_re}\t\t{amt_re}\t\t{amt_re})*"
29+
DIVIDEND_RE = r"(?:dividend|idcw).+?(reinvest)*.*?@\s+Rs\.\s*([\d\.]+)\s+per\s+unit"

casparser/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class SchemeType(TypedDict, total=False):
4242

4343
scheme_id: int
4444
scheme: str
45-
advisor: str
45+
advisor: Optional[str]
4646
rta_code: str
4747
rta: str
4848
isin: Optional[str]

0 commit comments

Comments
 (0)