2929 |
3030 +-> returns Order
3131"""
32- from typing import NamedTuple , Optional , List , Union , Iterable , Dict , Sequence , cast , Type
32+ from typing import (
33+ NamedTuple ,
34+ Optional ,
35+ List ,
36+ Union ,
37+ Iterable ,
38+ Dict ,
39+ Sequence ,
40+ cast ,
41+ Type ,
42+ Callable )
3343from abc import ABC , abstractmethod
3444import collections
3545import re
3949import logging
4050
4151import bs4
52+ from bs4 import BeautifulSoup , Tag , ResultSet , PageElement
53+
54+ from beancount_import .api_proxies .beautifulsoup import require_find , require_find_parent
4255import dateutil .parser
4356import beancount .core .amount
4457from beancount .core .amount import Amount
45- from beancount .core .number import D , ZERO , Decimal
58+ from beancount .core .number import D , ZERO
59+ from decimal import Decimal
4660
4761from ..amount_parsing import parse_amount , parse_number
4862
@@ -217,7 +231,7 @@ class Locale_de_DE(Locale_Data):
217231 # 'Extra Savings', '(?:.*) Discount', 'Gift[ -]Wrap',
218232 ]) + ') *:' )
219233 # most adjustments in DE are posttax:
220- posttax_adjustment_fields_pattern = 'Gutschein eingelöst:|Geschenkgutschein\(e\):'
234+ posttax_adjustment_fields_pattern = r 'Gutschein eingelöst:|Geschenkgutschein\(e\):'
221235
222236 # Payment Table & Credit Card Transactions
223237 grand_total = r'\n\s*(?:Gesamtsumme|Endsumme):\s+(.*)\n' # regular: Gesamtsumme, digital: Endsumme
@@ -367,11 +381,21 @@ def add_amount(a: Optional[Amount], b: Optional[Amount]) -> Optional[Amount]:
367381 return a
368382 return beancount .core .amount .add (a , b )
369383
384+ def reduce_amounts_may_return_none (amounts : Iterable [Amount ]) -> Optional [Amount ]:
385+ return functools .reduce (add_amount , amounts , None )
386+
370387
371- def reduce_amounts (amounts : Iterable [Amount ]) -> Optional [ Amount ] :
388+ def reduce_amounts (amounts : Iterable [Amount ]) -> Amount :
372389 """Reduce iterable of amounts to sum by applying `add_amount`.
373390 """
374- return functools .reduce (add_amount , amounts , None )
391+
392+ reduced_amounts = functools .reduce (add_amount , amounts , None )
393+ if reduced_amounts is None :
394+ raise ValueError ("amount iterable is empty, must be checked before reducing." )
395+ else :
396+ return reduced_amounts
397+
398+
375399
376400
377401def get_field_in_table (table , pattern , allow_multiple = False ,
@@ -639,7 +663,7 @@ def parse_shipment_payments(
639663 items_subtotal = locale .parse_amount (
640664 get_field_in_table (shipment_table , locale .items_subtotal ))
641665
642- expected_items_subtotal = reduce_amounts (
666+ expected_items_subtotal = reduce_amounts_may_return_none (
643667 beancount .core .amount .mul (x .price , D (x .quantity )) for x in items )
644668 if (items_subtotal is not None and
645669 expected_items_subtotal != items_subtotal ):
@@ -784,13 +808,17 @@ def parse_regular_order_invoice(path: str, locale=Locale_en_US) -> Order:
784808 """
785809 errors = [] # type: Errors
786810 with open (path , 'rb' ) as f :
787- soup = bs4 .BeautifulSoup (f .read (), 'lxml' )
811+ soup : BeautifulSoup = bs4 .BeautifulSoup (f .read (), 'lxml' )
788812
789813 # -----------------
790814 # Order ID & Order placed date
791815 # -----------------
792816 logger .debug ('parsing order id and order placed date...' )
793- title = soup .find ('title' ).text .strip ()
817+ title_element = soup .find ('title' )
818+ if title_element is None :
819+ raise ValueError ("soup.find('title') returned None" )
820+ else :
821+ title = title_element .text .strip ()
794822 m = re .fullmatch (locale .regular_order_id , title .strip ())
795823 assert m is not None
796824 order_id = m .group (1 )
@@ -799,7 +827,7 @@ def is_order_placed_node(node):
799827 m = re .fullmatch (locale .regular_order_placed , node .text .strip ())
800828 return m is not None
801829
802- node = soup . find ( is_order_placed_node )
830+ node = require_find ( soup , is_order_placed_node )
803831 m = re .fullmatch (locale .regular_order_placed , node .text .strip ())
804832 assert m is not None
805833 order_date = locale .parse_date (m .group (1 ))
@@ -827,9 +855,10 @@ def is_order_placed_node(node):
827855 # Aim: Parse all pre- and posttax adjustments
828856 # consistency check grand total against sum of item costs
829857 logger .debug ('parsing payment table...' )
830- payment_table_header = soup .find (
831- lambda node : node .name == 'table' and re .match (
832- locale .payment_information , node .text .strip ()))
858+
859+ matcher : Callable [[Tag ], bool ] = lambda node : node .name == 'table' and re .match (
860+ locale .payment_information , node .text .strip ()) is not None
861+ payment_table_header = require_find (soup , matcher )
833862
834863 payment_table = payment_table_header .find_parent ('table' )
835864
@@ -843,7 +872,7 @@ def is_order_placed_node(node):
843872 # detect which this is
844873
845874 # payment table pretax adjustments
846- pretax_amount = reduce_amounts (
875+ pretax_amount = reduce_amounts_may_return_none (
847876 a .amount for a in output_fields ['pretax_adjustments' ])
848877
849878 shipments_pretax_amount = None
@@ -919,25 +948,26 @@ def resolve_posttax_adjustments() -> List[Adjustment]:
919948 get_field_in_table (payment_table , locale .regular_estimated_tax ))
920949
921950 # tax from shipment tables
922- expected_tax = reduce_amounts (
923- a .amount for shipment in shipments for a in shipment .tax )
924- if expected_tax is None :
951+ shipment_amounts = [a .amount for shipment in shipments for a in shipment .tax ]
952+ if len (shipment_amounts ) == 0 :
925953 # tax not given on shipment level
926954 if not locale .tax_included_in_price :
927955 # add tax to adjustments if not already included in item prices
928956 shipments_total_adjustments .append (tax )
929- elif expected_tax != tax :
930- errors .append (
931- 'expected tax is %s, but parsed value is %s' % (expected_tax , tax ))
957+ else :
958+ expected_tax = reduce_amounts (shipment_amounts )
959+ if expected_tax != tax :
960+ errors .append (
961+ 'expected tax is %s, but parsed value is %s' % (expected_tax , tax ))
932962
933963 if locale .tax_included_in_price :
934964 # tax is already inlcuded in item prices
935965 # do not add additional transaction for taxes
936966 tax = None
937967
938968 logger .debug ('consistency check grand total...' )
939- payments_total_adjustment = reduce_amounts (payments_total_adjustments )
940- shipments_total_adjustment = reduce_amounts (shipments_total_adjustments )
969+ payments_total_adjustment = reduce_amounts_may_return_none (payments_total_adjustments )
970+ shipments_total_adjustment = reduce_amounts_may_return_none (shipments_total_adjustments )
941971
942972 expected_total = add_amount (shipments_total_adjustment ,
943973 reduce_amounts (x .total for x in shipments ))
@@ -1032,8 +1062,8 @@ def is_digital_order_row(node):
10321062 except :
10331063 return False
10341064
1035- digital_order_header = soup . find ( is_digital_order_row )
1036- digital_order_table = digital_order_header . find_parent ( 'table' )
1065+ digital_order_header = require_find ( soup , is_digital_order_row )
1066+ digital_order_table : Tag = require_find_parent ( digital_order_header , 'table' )
10371067 m = re .match (locale .digital_order , digital_order_header .text .strip ())
10381068 if m is None :
10391069 msg = ('Identified digital order invoice but no digital orders were found.' )
@@ -1043,10 +1073,9 @@ def is_digital_order_row(node):
10431073 assert m is not None
10441074 order_date = locale .parse_date (m .group (1 ))
10451075
1046- order_id_td = soup .find (
1047- lambda node : node .name == 'td' and
1048- re .match (locale .digital_order_id , node .text .strip ())
1049- )
1076+ matcher : Callable [[Tag ], bool ] = lambda node : node .name == 'td' and re .match (locale .digital_order_id , node .text .strip ()) is not None
1077+
1078+ order_id_td = require_find (soup , matcher )
10501079 m = re .match (locale .digital_order_id , order_id_td .text .strip ())
10511080 assert m is not None
10521081 order_id = m .group (1 )
@@ -1055,28 +1084,30 @@ def is_digital_order_row(node):
10551084 # Parse Items
10561085 # -----------
10571086 logger .debug ('parsing items...' )
1058- items_ordered_header = digital_order_table . find (
1087+ items_ordered_header = require_find ( digital_order_table ,
10591088 lambda node : is_items_ordered_header (node , locale ))
1060- item_rows = items_ordered_header .find_next_siblings ('tr' )
1061-
1089+ item_rows_raw : ResultSet [PageElement ] = items_ordered_header .find_next_siblings ('tr' )
1090+ # the find_all below needs them to be the narrower type Tag, so cast right away
1091+ item_rows : ResultSet [Tag ] = cast (ResultSet [Tag ], item_rows_raw )
1092+
10621093 items = [] # Sequence[DigitalItem]
10631094 other_fields_td = None
10641095
10651096 for item_row in item_rows :
1066- tds = item_row ('td' )
1097+ tds = item_row . find_all ('td' )
10671098 if len (tds ) != 2 :
10681099 # payment information on order level (not payment table)
10691100 # differently formatted, take first column only
10701101 other_fields_td = tds [0 ]
10711102 continue
1072- description_node = tds [0 ]
1103+ description_node : Tag = cast ( Tag , tds [0 ])
10731104 price_node = tds [1 ]
10741105 price = price_node .text .strip ()
10751106
1076- a = description_node .find ('a' )
1107+ a = cast ( Tag , description_node .find ('a' ) )
10771108 if a is not None :
10781109 description = a .text .strip ()
1079- url = a ['href' ]
1110+ url = cast ( str , a ['href' ])
10801111 else :
10811112 bold_node = description_node .find ('b' )
10821113 description = bold_node .text .strip ()
@@ -1156,7 +1187,7 @@ def get_amounts_in_text(pattern_map):
11561187 locale .pretax_adjustment_fields_pattern )
11571188 pretax_parts = ([items_subtotal ] +
11581189 [a .amount for a in output_fields ['pretax_adjustments' ]])
1159- expected_total_before_tax = reduce_amounts (pretax_parts )
1190+ expected_total_before_tax = reduce_amounts_may_return_none (pretax_parts )
11601191 if expected_total_before_tax != total_before_tax :
11611192 errors .append ('expected total before tax is %s, but parsed value is %s'
11621193 % (expected_total_before_tax , total_before_tax ))
0 commit comments