Skip to content

Commit 6b54213

Browse files
authored
Fix mypy issues & gh action (#251)
* fix mypy issues: ignore most, fix in beancount_import/source/amazon_invoice.py * add sudo apt-get update
1 parent 0447ba9 commit 6b54213

File tree

5 files changed

+139
-36
lines changed

5 files changed

+139
-36
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ jobs:
4141
working-directory: frontend
4242
- name: Install pdftotext (on Linux)
4343
if: ${{ runner.os == 'Linux' }}
44-
run: sudo apt-get install poppler-utils
44+
run: |
45+
sudo apt-get update
46+
sudo apt-get install poppler-utils
4547
- name: Install Python packaging/test tools
4648
run: python -m pip install tox wheel
4749
- name: Show package version

beancount_import/api_proxies/__init__.py

Whitespace-only changes.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from bs4.element import Tag, PageElement, _FindMethodName, NavigableString
2+
3+
from typing import Optional, Any, Union, cast, Pattern, Callable, Iterable
4+
from bs4._typing import (
5+
_OneElement,
6+
_StrainableAttribute,
7+
_StrainableAttributes,
8+
_StrainableString,
9+
)
10+
11+
#
12+
#
13+
def require_find(
14+
tag: Tag,
15+
name: _FindMethodName = None,
16+
attrs: _StrainableAttributes = {},
17+
recursive: bool = True,
18+
string: Optional[_StrainableString] = None,
19+
**kwargs: _StrainableAttribute,
20+
) -> _OneElement:
21+
22+
"""
23+
wrapper for Tag.find() that behaves like find() but raises if element is not found.
24+
Assumes the result is always a Tag (not NavigableString).
25+
"""
26+
result = tag.find(
27+
name=name,
28+
attrs=attrs,
29+
recursive=recursive,
30+
string=string,
31+
**kwargs,
32+
)
33+
if result is None or not isinstance(result, Tag):
34+
raise ValueError(f"require_find: Element not found: {name!r}, {attrs!r}, {kwargs!r}")
35+
return cast(Tag, result)
36+
37+
def require_find_parent(
38+
page_element: PageElement,
39+
name: _FindMethodName = None,
40+
attrs: _StrainableAttributes = {},
41+
**kwargs: _StrainableAttribute,
42+
) -> Tag:
43+
44+
result = page_element.find_parent(
45+
name=name,
46+
attrs=attrs,
47+
**kwargs,
48+
)
49+
if result is None or not isinstance(result, Tag):
50+
raise ValueError(
51+
f"require_find_parent: Element not found: {name!r}, {attrs!r}, {kwargs!r}")
52+
return cast(Tag, result)

beancount_import/source/amazon_invoice.py

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,17 @@
2929
|
3030
+-> returns Order
3131
"""
32-
from typing import NamedTuple, Optional, List, Union, Iterable, Dict, Sequence, cast, Type
32+
from typing import (
33+
NamedTuple,
34+
Optional,
35+
List,
36+
Union,
37+
Iterable,
38+
Dict,
39+
Sequence,
40+
cast,
41+
Type,
42+
Callable )
3343
from abc import ABC, abstractmethod
3444
import collections
3545
import re
@@ -39,10 +49,14 @@
3949
import logging
4050

4151
import bs4
52+
from bs4 import BeautifulSoup, Tag, ResultSet, PageElement
53+
54+
from beancount_import.api_proxies.beautifulsoup import require_find, require_find_parent
4255
import dateutil.parser
4356
import beancount.core.amount
4457
from beancount.core.amount import Amount
45-
from beancount.core.number import D, ZERO, Decimal
58+
from beancount.core.number import D, ZERO
59+
from decimal import Decimal
4660

4761
from ..amount_parsing import parse_amount, parse_number
4862

@@ -217,7 +231,7 @@ class Locale_de_DE(Locale_Data):
217231
# 'Extra Savings', '(?:.*) Discount', 'Gift[ -]Wrap',
218232
]) + ') *:')
219233
# most adjustments in DE are posttax:
220-
posttax_adjustment_fields_pattern='Gutschein eingelöst:|Geschenkgutschein\(e\):'
234+
posttax_adjustment_fields_pattern=r'Gutschein eingelöst:|Geschenkgutschein\(e\):'
221235

222236
# Payment Table & Credit Card Transactions
223237
grand_total=r'\n\s*(?:Gesamtsumme|Endsumme):\s+(.*)\n' # regular: Gesamtsumme, digital: Endsumme
@@ -367,11 +381,21 @@ def add_amount(a: Optional[Amount], b: Optional[Amount]) -> Optional[Amount]:
367381
return a
368382
return beancount.core.amount.add(a, b)
369383

384+
def reduce_amounts_may_return_none(amounts: Iterable[Amount]) -> Optional[Amount]:
385+
return functools.reduce(add_amount, amounts, None)
386+
370387

371-
def reduce_amounts(amounts: Iterable[Amount]) -> Optional[Amount]:
388+
def reduce_amounts(amounts: Iterable[Amount]) -> Amount:
372389
"""Reduce iterable of amounts to sum by applying `add_amount`.
373390
"""
374-
return functools.reduce(add_amount, amounts, None)
391+
392+
reduced_amounts = functools.reduce(add_amount, amounts, None)
393+
if reduced_amounts is None:
394+
raise ValueError("amount iterable is empty, must be checked before reducing.")
395+
else:
396+
return reduced_amounts
397+
398+
375399

376400

377401
def get_field_in_table(table, pattern, allow_multiple=False,
@@ -639,7 +663,7 @@ def parse_shipment_payments(
639663
items_subtotal = locale.parse_amount(
640664
get_field_in_table(shipment_table, locale.items_subtotal))
641665

642-
expected_items_subtotal = reduce_amounts(
666+
expected_items_subtotal = reduce_amounts_may_return_none(
643667
beancount.core.amount.mul(x.price, D(x.quantity)) for x in items)
644668
if (items_subtotal is not None and
645669
expected_items_subtotal != items_subtotal):
@@ -784,13 +808,17 @@ def parse_regular_order_invoice(path: str, locale=Locale_en_US) -> Order:
784808
"""
785809
errors = [] # type: Errors
786810
with open(path, 'rb') as f:
787-
soup = bs4.BeautifulSoup(f.read(), 'lxml')
811+
soup: BeautifulSoup = bs4.BeautifulSoup(f.read(), 'lxml')
788812

789813
# -----------------
790814
# Order ID & Order placed date
791815
# -----------------
792816
logger.debug('parsing order id and order placed date...')
793-
title = soup.find('title').text.strip()
817+
title_element = soup.find('title')
818+
if title_element is None:
819+
raise ValueError("soup.find('title') returned None")
820+
else:
821+
title = title_element.text.strip()
794822
m = re.fullmatch(locale.regular_order_id, title.strip())
795823
assert m is not None
796824
order_id=m.group(1)
@@ -799,7 +827,7 @@ def is_order_placed_node(node):
799827
m = re.fullmatch(locale.regular_order_placed, node.text.strip())
800828
return m is not None
801829

802-
node = soup.find(is_order_placed_node)
830+
node = require_find(soup,is_order_placed_node)
803831
m = re.fullmatch(locale.regular_order_placed, node.text.strip())
804832
assert m is not None
805833
order_date = locale.parse_date(m.group(1))
@@ -827,9 +855,10 @@ def is_order_placed_node(node):
827855
# Aim: Parse all pre- and posttax adjustments
828856
# consistency check grand total against sum of item costs
829857
logger.debug('parsing payment table...')
830-
payment_table_header = soup.find(
831-
lambda node: node.name == 'table' and re.match(
832-
locale.payment_information, node.text.strip()))
858+
859+
matcher : Callable[[Tag], bool] = lambda node: node.name == 'table' and re.match(
860+
locale.payment_information, node.text.strip()) is not None
861+
payment_table_header = require_find(soup, matcher)
833862

834863
payment_table = payment_table_header.find_parent('table')
835864

@@ -843,7 +872,7 @@ def is_order_placed_node(node):
843872
# detect which this is
844873

845874
# payment table pretax adjustments
846-
pretax_amount = reduce_amounts(
875+
pretax_amount = reduce_amounts_may_return_none(
847876
a.amount for a in output_fields['pretax_adjustments'])
848877

849878
shipments_pretax_amount = None
@@ -919,25 +948,26 @@ def resolve_posttax_adjustments() -> List[Adjustment]:
919948
get_field_in_table(payment_table, locale.regular_estimated_tax))
920949

921950
# tax from shipment tables
922-
expected_tax = reduce_amounts(
923-
a.amount for shipment in shipments for a in shipment.tax)
924-
if expected_tax is None:
951+
shipment_amounts = [a.amount for shipment in shipments for a in shipment.tax]
952+
if len(shipment_amounts) == 0:
925953
# tax not given on shipment level
926954
if not locale.tax_included_in_price:
927955
# add tax to adjustments if not already included in item prices
928956
shipments_total_adjustments.append(tax)
929-
elif expected_tax != tax:
930-
errors.append(
931-
'expected tax is %s, but parsed value is %s' % (expected_tax, tax))
957+
else:
958+
expected_tax = reduce_amounts(shipment_amounts)
959+
if expected_tax != tax:
960+
errors.append(
961+
'expected tax is %s, but parsed value is %s' % (expected_tax, tax))
932962

933963
if locale.tax_included_in_price:
934964
# tax is already inlcuded in item prices
935965
# do not add additional transaction for taxes
936966
tax = None
937967

938968
logger.debug('consistency check grand total...')
939-
payments_total_adjustment = reduce_amounts(payments_total_adjustments)
940-
shipments_total_adjustment = reduce_amounts(shipments_total_adjustments)
969+
payments_total_adjustment = reduce_amounts_may_return_none(payments_total_adjustments)
970+
shipments_total_adjustment = reduce_amounts_may_return_none(shipments_total_adjustments)
941971

942972
expected_total = add_amount(shipments_total_adjustment,
943973
reduce_amounts(x.total for x in shipments))
@@ -1032,8 +1062,8 @@ def is_digital_order_row(node):
10321062
except:
10331063
return False
10341064

1035-
digital_order_header = soup.find(is_digital_order_row)
1036-
digital_order_table = digital_order_header.find_parent('table')
1065+
digital_order_header = require_find(soup, is_digital_order_row)
1066+
digital_order_table : Tag = require_find_parent(digital_order_header, 'table')
10371067
m = re.match(locale.digital_order, digital_order_header.text.strip())
10381068
if m is None:
10391069
msg = ('Identified digital order invoice but no digital orders were found.')
@@ -1043,10 +1073,9 @@ def is_digital_order_row(node):
10431073
assert m is not None
10441074
order_date = locale.parse_date(m.group(1))
10451075

1046-
order_id_td = soup.find(
1047-
lambda node: node.name == 'td' and
1048-
re.match(locale.digital_order_id, node.text.strip())
1049-
)
1076+
matcher: Callable[[Tag], bool] = lambda node: node.name == 'td' and re.match(locale.digital_order_id, node.text.strip()) is not None
1077+
1078+
order_id_td = require_find(soup, matcher)
10501079
m = re.match(locale.digital_order_id, order_id_td.text.strip())
10511080
assert m is not None
10521081
order_id = m.group(1)
@@ -1055,28 +1084,30 @@ def is_digital_order_row(node):
10551084
# Parse Items
10561085
# -----------
10571086
logger.debug('parsing items...')
1058-
items_ordered_header = digital_order_table.find(
1087+
items_ordered_header = require_find(digital_order_table,
10591088
lambda node: is_items_ordered_header(node, locale))
1060-
item_rows = items_ordered_header.find_next_siblings('tr')
1061-
1089+
item_rows_raw : ResultSet[PageElement] = items_ordered_header.find_next_siblings('tr')
1090+
# the find_all below needs them to be the narrower type Tag, so cast right away
1091+
item_rows : ResultSet[Tag] = cast(ResultSet[Tag], item_rows_raw)
1092+
10621093
items = [] # Sequence[DigitalItem]
10631094
other_fields_td = None
10641095

10651096
for item_row in item_rows:
1066-
tds = item_row('td')
1097+
tds = item_row.find_all('td')
10671098
if len(tds) != 2:
10681099
# payment information on order level (not payment table)
10691100
# differently formatted, take first column only
10701101
other_fields_td = tds[0]
10711102
continue
1072-
description_node = tds[0]
1103+
description_node : Tag = cast(Tag, tds[0])
10731104
price_node = tds[1]
10741105
price = price_node.text.strip()
10751106

1076-
a = description_node.find('a')
1107+
a = cast(Tag,description_node.find('a'))
10771108
if a is not None:
10781109
description = a.text.strip()
1079-
url = a['href']
1110+
url = cast(str,a['href'])
10801111
else:
10811112
bold_node = description_node.find('b')
10821113
description = bold_node.text.strip()
@@ -1156,7 +1187,7 @@ def get_amounts_in_text(pattern_map):
11561187
locale.pretax_adjustment_fields_pattern)
11571188
pretax_parts = ([items_subtotal] +
11581189
[a.amount for a in output_fields['pretax_adjustments']])
1159-
expected_total_before_tax = reduce_amounts(pretax_parts)
1190+
expected_total_before_tax = reduce_amounts_may_return_none(pretax_parts)
11601191
if expected_total_before_tax != total_before_tax:
11611192
errors.append('expected total before tax is %s, but parsed value is %s'
11621193
% (expected_total_before_tax, total_before_tax))

mypy.ini

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
11
[mypy]
22
warn_unused_configs = True
33
ignore_missing_imports = True
4+
5+
# modules with note
6+
# note: By default the bodies of untyped functions are not checked, consider using --check-untyped-defs [annotation-unchecked]
7+
8+
9+
# modules with errors
10+
[mypy-beancount_import/source/stockplanconnect.*]
11+
ignore_errors = True
12+
[mypy-beancount_import/source/amazon_invoice_sanitize.*]
13+
ignore_errors = True
14+
#[mypy-beancount_import/source/amazon_invoice.*]
15+
#ignore_errors = True
16+
[mypy-beancount_import/source/schwab_csv.*]
17+
ignore_errors = True
18+
[mypy-beancount_import/source/ofx.*]
19+
ignore_errors = True
20+
[mypy-beancount_import/source/amazon.*]
21+
ignore_errors = True

0 commit comments

Comments
 (0)