Skip to content

Commit 6bf0127

Browse files
committed
summary parser: fix bugs with pdfminer backend and update test cases
1 parent 8470e6e commit 6bf0127

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

casparser/process/cas_summary.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ def process_summary_text(text):
4343
for line in lines:
4444
if len(folios) > 0 and re.search("Total", line, re.I):
4545
break
46-
scheme_tail = ""
47-
if m := re.search(SCHEME_TAIL_RE, line, re.DOTALL | re.MULTILINE):
48-
scheme_tail = m.group(1).strip()
49-
line = line.replace(scheme_tail, "")
50-
scheme_tail = re.sub(r"\s+", " ", scheme_tail).strip()
46+
scheme_tails = []
47+
if m := re.findall(SCHEME_TAIL_RE, line):
48+
for txt in m:
49+
line = line.replace(txt, "")
50+
scheme_tails.append(re.sub(r"\s+", " ", txt).strip())
5151
if m := re.search(SUMMARY_ROW_RE, line, re.DOTALL | re.MULTILINE | re.I):
5252
folio = m.group("folio").strip()
5353
if current_folio is None or current_folio != folio:
@@ -61,8 +61,8 @@ def process_summary_text(text):
6161
schemes=[],
6262
)
6363
scheme = m.group("name")
64-
if scheme_tail != "":
65-
scheme = " ".join([scheme, scheme_tail])
64+
if len(scheme_tails) > 0:
65+
scheme = " ".join([scheme, *scheme_tails])
6666
scheme = re.sub(r"\(formerly.+?\)", "", scheme, flags=re.I | re.DOTALL).strip()
6767
rta = m.group("rta").strip()
6868
rta_code = m.group("code").strip()

casparser/process/regex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@
4444
TRANSACTION_RE3 = rf"{date_re}\t\t([^0-9].*)\t\t{amt_re}(?:\t\t{amt_re}\t\t{amt_re}\t\t{amt_re})*"
4545
DESCRIPTION_TAIL_RE = r"(\n.+?)(\t\t|$)"
4646
DIVIDEND_RE = r"(?:div\.|dividend|idcw).+?(reinvest)*.*?@\s*Rs\.\s*([\d\.]+)(?:\s+per\s+unit)?"
47-
SCHEME_TAIL_RE = r"(\n.+)$"
47+
SCHEME_TAIL_RE = r"(\n.+?)(?:\t\t|$)"

tests/base.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,19 @@ def test_output_json(self):
5656

5757
def test_read_summary(self):
5858
summary_files = (
59-
(self.cams_summary_file_name, self.cams_password, 4),
60-
(
61-
self.kfintech_summary_file_name,
62-
self.kfintech_password,
63-
9,
64-
),
59+
(self.cams_summary_file_name, self.cams_password, 4, 6),
60+
(self.kfintech_summary_file_name, self.kfintech_password, 9, 13),
6561
)
66-
for filename, password, num_folios in summary_files:
62+
for filename, password, num_folios, num_schemes in summary_files:
6763
data = self.read_pdf(filename, password)
6864
assert len(data.folios) == num_folios
65+
schemes_found = 0
6966
for folio in data.folios:
67+
schemes_found += len(folio.schemes)
7068
for scheme in folio.schemes:
7169
assert scheme.isin is not None
7270
assert scheme.amfi is not None
71+
assert schemes_found == num_schemes
7372
assert data.investor_info.mobile not in (None, "")
7473
assert data.cas_type == CASFileType.SUMMARY.value
7574

0 commit comments

Comments
 (0)