Skip to content

Commit 3aa3053

Browse files
committed
fix PAN parser + add nominee parser
1 parent ee7a60c commit 3aa3053

File tree

4 files changed

+27
-8
lines changed

4 files changed

+27
-8
lines changed

casparser/process/cas_detailed.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
DETAILED_DATE_RE,
2525
DIVIDEND_RE,
2626
FOLIO_RE,
27+
FOLIO_KV_RE,
2728
NAV_RE,
29+
NOMINEE_RE,
2830
OPEN_UNITS_RE,
2931
REGISTRAR_RE,
3032
SCHEME_RE,
@@ -155,20 +157,33 @@ def process_detailed_text(text):
155157
line = "\t\t".join([lines[idx + 1], line])
156158
if amc_match := re.search(AMC_RE, line, re.I | re.DOTALL):
157159
current_amc = amc_match.group(0)
158-
elif m := re.search(FOLIO_RE, line, re.I | re.DOTALL):
160+
elif m := re.search(FOLIO_RE, line):
159161
folio = m.group(1).strip()
160162
if current_folio is None or current_folio != folio:
161163
if curr_scheme_data and current_folio is not None:
162164
folios[current_folio].schemes.append(curr_scheme_data)
163165
curr_scheme_data = None
164166
current_folio = folio
167+
168+
pan = ""
169+
kyc = None
170+
pankyc = None
171+
for k, v in re.findall(FOLIO_KV_RE, line):
172+
v = v.strip()
173+
if k == "KYC":
174+
kyc = v
175+
elif len(v) == 10:
176+
pan = v
177+
else:
178+
pankyc = v
179+
165180
if folio not in folios:
166181
folios[folio] = Folio(
167182
folio=current_folio,
168183
amc=current_amc,
169-
PAN=(m.group(2) or "").strip(),
170-
KYC=None if m.group(3) is None else m.group(3).strip(),
171-
PANKYC=None if m.group(4) is None else m.group(4).strip(),
184+
PAN=pan,
185+
KYC=kyc,
186+
PANKYC=pankyc,
172187
schemes=[],
173188
)
174189
elif m := re.search(SCHEME_RE, line, re.DOTALL | re.MULTILINE | re.I):
@@ -207,6 +222,8 @@ def process_detailed_text(text):
207222
)
208223
if not curr_scheme_data:
209224
continue
225+
if m := re.search(NOMINEE_RE, line, re.I | re.DOTALL):
226+
curr_scheme_data.nominees.extend([x.strip() for x in m.groups() if x.strip()])
210227
if m := re.search(OPEN_UNITS_RE, line):
211228
curr_scheme_data.open = Decimal(m.group(1).replace(",", "_"))
212229
curr_scheme_data.close_calculated = curr_scheme_data.open

casparser/process/regex.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
SCHEME_TAIL_RE = r"(\n.+?)\t\t"
1818

1919
AMC_RE = r"^(.+?\s+(MF|Mutual\s*Fund)|franklin\s+templeton\s+investments)$"
20-
FOLIO_RE = (
21-
r"Folio\s+No\s*:\s+([\d/\s]+)\s*.*?(?:PAN\s*:\s*([A-Z]{5}\d{4}[A-Z])\s+)?.*?"
22-
r"(?:KYC\s*:\s*(OK|NOT\s+OK))?\s*.*?(?:PAN\s*:\s*(OK|NOT\s+OK))?$"
23-
)
20+
FOLIO_RE = r"^Folio\s+No\s*:\s+([\d/\s]+\d)\s"
21+
FOLIO_KV_RE = r"(PAN|KYC)\s*:\s*([A-Z]{5}\d{4}[A-Z]|OK|NOT OK)"
22+
23+
NOMINEE_RE = r"\s*Nominee\s+[1-3]\s*:\s*(.*?)" * 3 + r"$"
2424

2525
SCHEME_RE = (
2626
r"(?P<code>[\s\w]+-*[gdp]?)-\s*\d*\s*(?P<name>.+?)(?:\t\t|\(|ISIN).*?"

casparser/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class Scheme(BaseModel):
5454
type: Optional[str] = None
5555
isin: Optional[str] = None
5656
amfi: Optional[str] = None
57+
nominees: List[str] = []
5758
open: Union[Decimal, float]
5859
close: Union[Decimal, float]
5960
close_calculated: Union[Decimal, float]

tests/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def test_output_json(self):
4848
len(data.get("folios", [])) == num_folios
4949
), f"Expected : {num_folios} :: Got {len(data.get('folios', []))}"
5050
for folio in data["folios"]:
51+
assert isinstance(folio["PAN"], str) and len(folio["PAN"]) == 10
5152
for scheme in folio.get("schemes", []):
5253
assert scheme["isin"] is not None
5354
assert scheme["amfi"] is not None

0 commit comments

Comments
 (0)