88from ..exceptions import HeaderParseError , CASParseError
99from .regex import DETAILED_DATE_RE , FOLIO_RE , SCHEME_RE , REGISTRAR_RE
1010from .regex import CLOSE_UNITS_RE , NAV_RE , OPEN_UNITS_RE , VALUATION_RE
11- from .regex import DESCRIPTION_TAIL_RE , DIVIDEND_RE , TRANSACTION_RE
12- from ..types import FolioType
11+ from .regex import DIVIDEND_RE , TRANSACTION_RE1 , TRANSACTION_RE2
12+ from ..types import FolioType , SchemeType
1313from .utils import isin_search
1414
1515
@@ -31,7 +31,8 @@ def get_transaction_type(
3131 dividend_rate = None
3232 description = description .lower ()
3333 if div_match := re .search (DIVIDEND_RE , description , re .I | re .DOTALL ):
34- reinvest_flag , dividend_rate = div_match .groups ()
34+ reinvest_flag , dividend_str = div_match .groups ()
35+ dividend_rate = Decimal (dividend_str )
3536 txn_type = (
3637 TransactionType .DIVIDEND_REINVEST if reinvest_flag else TransactionType .DIVIDEND_PAYOUT
3738 )
@@ -73,6 +74,12 @@ def get_transaction_type(
7374 return txn_type , dividend_rate
7475
7576
77+ def parse_transaction (line ):
78+ for regex in (TRANSACTION_RE1 , TRANSACTION_RE2 ):
79+ if m := re .search (regex , line , re .DOTALL | re .MULTILINE | re .I ):
80+ return m
81+
82+
7683def process_detailed_text (text ):
7784 """
7885 Process the text version of a CAS pdf and return the detailed summary.
@@ -93,11 +100,6 @@ def process_detailed_text(text):
93100 # "Registrar" column to the previous line
94101 if re .search (REGISTRAR_RE , line ):
95102 line = "\t \t " .join ([lines [idx + 1 ], line ])
96- if m := re .search (DESCRIPTION_TAIL_RE , line , re .I | re .DOTALL ):
97- description_tail = m .group (1 ).rstrip ()
98- line = line .replace (description_tail , "" )
99- else :
100- description_tail = ""
101103 if amc_match := re .search (r"^(.+?)\s+(MF|Mutual\s+Fund)$" , line , re .I | re .DOTALL ):
102104 current_amc = amc_match .group (0 )
103105 elif m := re .search (FOLIO_RE , line , re .I | re .DOTALL ):
@@ -128,7 +130,7 @@ def process_detailed_text(text):
128130 rta = m .group (4 ).strip ()
129131 rta_code = m .group (1 ).strip ()
130132 isin , amfi = isin_search (scheme , rta , rta_code )
131- curr_scheme_data = {
133+ curr_scheme_data : SchemeType = {
132134 "scheme" : scheme ,
133135 "advisor" : advisor ,
134136 "rta_code" : rta_code ,
@@ -138,7 +140,7 @@ def process_detailed_text(text):
138140 "open" : Decimal (0.0 ),
139141 "close" : Decimal (0.0 ),
140142 "close_calculated" : Decimal (0.0 ),
141- "valuation" : {"date" : None , "value" : 0 , "nav" : 0 },
143+ "valuation" : {"date" : None , "value" : Decimal ( 0.0 ) , "nav" : Decimal ( 0.0 ) },
142144 "transactions" : [],
143145 }
144146 if not curr_scheme_data :
@@ -161,9 +163,9 @@ def process_detailed_text(text):
161163 nav = Decimal (m .group (2 ).replace ("," , "_" )),
162164 )
163165 continue
164- if m := re . search ( TRANSACTION_RE , line , re . DOTALL ):
166+ if m := parse_transaction ( line ):
165167 date = date_parser .parse (m .group (1 )).date ()
166- desc = m .group (2 ).strip () + description_tail
168+ desc = m .group (2 ).strip ()
167169 amt = Decimal (m .group (3 ).replace ("," , "_" ).replace ("(" , "-" ))
168170 if m .group (4 ) is None :
169171 units = None
0 commit comments