Skip to content

Commit 32878e0

Browse files
committed
support parsing long scheme names
1 parent ef36eb5 commit 32878e0

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

casparser/process/cas_detailed.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from ..enums import TransactionType, CASFileType
88
from ..exceptions import HeaderParseError, CASParseError
9-
from .regex import DETAILED_DATE_RE, FOLIO_RE, SCHEME_RE
9+
from .regex import DETAILED_DATE_RE, FOLIO_RE, SCHEME_RE, REGISTRAR_RE
1010
from .regex import CLOSE_UNITS_RE, NAV_RE, OPEN_UNITS_RE, VALUATION_RE
1111
from .regex import DESCRIPTION_TAIL_RE, DIVIDEND_RE, TRANSACTION_RE
1212

@@ -80,10 +80,13 @@ def process_detailed_text(text):
8080
current_folio = None
8181
current_amc = None
8282
curr_scheme_data = {}
83-
balance = Decimal(0.0)
8483
lines = text.split("\u2029")
85-
for line in lines:
86-
if m := re.search(DESCRIPTION_TAIL_RE, line, re.I | re.DOTALL):
84+
for idx, line in enumerate(lines):
85+
# Parse schemes with long names (single line) effectively pushing
86+
# "Registrar" column to the previous line
87+
if re.search(REGISTRAR_RE, line):
88+
line = "\t\t".join([lines[idx + 1], line])
89+
elif m := re.search(DESCRIPTION_TAIL_RE, line, re.I | re.DOTALL):
8790
description_tail = m.group(1).rstrip()
8891
line = line.replace(description_tail, "")
8992
else:

casparser/process/regex.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
)
1616

1717
SCHEME_RE = r"([\s\w]+)-\s*\d*\s*(.+?)\s*(?:\(Advisor\s*:\s*(.+?)\))*\s+Registrar\s*:\s*(.*)\s*$"
18+
REGISTRAR_RE = r"^\s*Registrar\s*:\s*(.*)\s*$"
1819
OPEN_UNITS_RE = r"Opening\s+Unit\s+Balance.+?([\d,.]+)"
1920
CLOSE_UNITS_RE = r"Closing\s+Unit\s+Balance.+?([\d,.]+)"
2021
VALUATION_RE = r"Valuation\s+on\s+(\d{2}-[A-Za-z]{3}-\d{4})\s*:\s*INR\s*([\d,.]+)"

0 commit comments

Comments
 (0)