Skip to content

Commit 64eebdc

Browse files
authored
Merge pull request #126 from vnepomuceno/fix/refactor-restaurant-filters
Refactor restaurant filters
2 parents 5e403d5 + 84ebffc commit 64eebdc

File tree

6 files changed

+119
-64
lines changed

6 files changed

+119
-64
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ auth
44
**/output
55
**/properties.yml
66
**/__pycache__
7-
.env
7+
.env

gmail_fisher/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import logging
2+
import os
23

34
import coloredlogs
45

56
from gmail_fisher.utils.config import LOG_LEVEL, LOG_FORMAT
67

78

9+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
10+
11+
812
def get_logger(name: str) -> logging.Logger:
913
coloredlogs.install()
1014
custom_logger = logging.getLogger(name)

gmail_fisher/parsers/food.py

Lines changed: 30 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import json
2+
import os
23
import re
34
from abc import ABC, abstractmethod
45
from pathlib import Path
5-
from typing import Iterable, Optional, Final, Dict
6+
from typing import Iterable, Optional, Final
67

78
import html2text as html2text
89
from alive_progress import alive_bar
910

10-
from gmail_fisher import get_logger
11+
from gmail_fisher import get_logger, ROOT_DIR
1112
from gmail_fisher.api.gateway import GmailGateway
1213
from gmail_fisher.data.models import (
1314
GmailMessage,
@@ -16,10 +17,18 @@
1617
FoodExpense,
1718
)
1819
from gmail_fisher.parsers import print_header
20+
from gmail_fisher.utils.json_utils import JsonUtils
1921

2022
logger = get_logger(__name__)
2123

2224

25+
def apply_restaurant_filter(func):
26+
def wrapper(*args, **kwargs):
27+
return FoodExpenseParser.apply_restaurant_filters(func(*args, **kwargs))
28+
29+
return wrapper
30+
31+
2332
class FoodExpenseParser(ABC):
2433
@classmethod
2534
@abstractmethod
@@ -34,51 +43,37 @@ def serialize_expenses_to_json_file(
3443
output_path = Path(output_path)
3544
if not output_path.parent.exists():
3645
output_path.parent.mkdir(exist_ok=True)
37-
file = open(output_path, "w")
46+
3847
sorted_expenses = sorted(expenses, key=lambda exp: exp.date, reverse=True)
3948
json_expenses = json.dumps(
4049
[expense.__dict__ for expense in sorted_expenses],
4150
ensure_ascii=False,
4251
indent=4,
4352
)
44-
file.write(json_expenses)
45-
file.close()
46-
logger.success(f"Successfully written results to {output_path=}")
53+
54+
JsonUtils.write_to_json_file(json_expenses, output_path)
4755
return json_expenses
4856

4957
@classmethod
50-
def find_and_replace_string_value(
51-
cls, string_value: str, filters: dict[str, str]
52-
) -> str:
58+
def apply_restaurant_filters(cls, string_value: str) -> str:
5359
"""
5460
Apply filters by finding the filters dict key and replacing it by the dict value
5561
"""
56-
for exclude_str, replace_str in filters.items():
57-
string_value = string_value.replace(exclude_str, replace_str)
62+
filters = JsonUtils.load_dict_from_json(
63+
os.path.join(ROOT_DIR, "parsers/restaurant-filters.json")
64+
)
65+
66+
for filter_key, filter_value in filters["replace"].items():
67+
string_value = string_value.replace(filter_key, filter_value)
68+
for trim_str in filters["trim"]:
69+
string_value = string_value.replace(trim_str, "")
70+
5871
return string_value
5972

6073

6174
class BoltFoodParser(FoodExpenseParser):
6275
sender_email: Final[str] = "portugal-food@bolt.eu"
6376
keywords: Final[str] = "Delivery from Bolt Food"
64-
# TODO: Extract this list to an external file ignored by git and load it when script runs
65-
# TODO: Make this filtering generic across parsers
66-
restaurant_filters: Final[Dict[str, str]] = {
67-
" - Saldanha Avenida Casal Ribeiro, 50 B , 1000-093 To Praça Aniceto do Rosário, Lisbon 1 Hamburguer X": "",
68-
" - Saldanha Av. Miguel Bombarda, 23B": "",
69-
" Rua do saco 50, 1150-284 Lisboa To Praça Aniceto do Rosário, Lisbon 1 🎁 2x1": "",
70-
" - Av. Roma Avenida de Roma 74 B": "",
71-
"'": "'",
72-
" Rua Marquês de Fronteira 117F": "",
73-
", 1070-292 Lisboa To Praça Aniceto do Rosário, Lisbon 1 × 🎁 2x1": "",
74-
" Av. Da República, 97 B": "",
75-
", 1070": "",
76-
" Praça do Chile 8 Lisboa 1000": "",
77-
" Rua da Penha de França": "",
78-
" Centro de Lazer do Campo Pequeno loja 412": "",
79-
" Av. Duque de Ávila 46B": "",
80-
" Rua do saco 50, 1150": "",
81-
}
8277

8378
@classmethod
8479
def fetch_expenses(cls) -> Iterable[FoodExpense]:
@@ -120,6 +115,7 @@ def parse_expenses_from_messages(
120115
return expenses
121116

122117
@classmethod
118+
@apply_restaurant_filter
123119
def get_restaurant(cls, message: GmailMessage) -> Optional[str]:
124120
if re.search(r"From .* -", message.subject) is not None:
125121
restaurant = re.search(r"From .* -", message.subject).group()[5:-2]
@@ -136,9 +132,7 @@ def get_restaurant(cls, message: GmailMessage) -> Optional[str]:
136132
if restaurant.__contains__("-"):
137133
restaurant = restaurant.split("-")[0].strip()
138134

139-
return FoodExpenseParser.find_and_replace_string_value(
140-
restaurant, cls.restaurant_filters
141-
)
135+
return restaurant
142136

143137
@staticmethod
144138
def get_date(message: GmailMessage) -> str:
@@ -168,26 +162,6 @@ def get_total_payed(message: GmailMessage) -> Optional[float]:
168162
class UberEatsParser(FoodExpenseParser):
169163
sender_email: Final[str] = "uber.portugal@uber.com"
170164
keywords: Final[str] = "Total"
171-
restaurant_filters: Final[Dict[str, str]] = {
172-
"'": "'",
173-
"&": "&",
174-
"\u00ae": "",
175-
" 🐠": "",
176-
" (Marquês de Pombal)": "",
177-
"® (Saldanha)": "",
178-
" (Saldanha)": "",
179-
" (General Roçadas)": "",
180-
" (Fontes Pereira de Melo)": "",
181-
" (São Sebastião)": "",
182-
" (Graça)": "",
183-
" (Monumental)": "",
184-
" (Saldanha Residence)": "",
185-
" (República)": "",
186-
" (Sta": "",
187-
" (Barata Salgueiro)": "",
188-
" (Rossio)": "",
189-
" - by Street Chow": "",
190-
}
191165

192166
@classmethod
193167
def fetch_expenses(cls) -> Iterable[FoodExpense]:
@@ -212,7 +186,7 @@ def parse_expenses_from_messages(
212186
try:
213187
expense = UberEatsExpense(
214188
id=message.id,
215-
restaurant=cls.get_restaurant(message, cls.restaurant_filters),
189+
restaurant=cls.get_restaurant(message),
216190
total=cls.get_total_payed(message),
217191
date=cls.get_date(message),
218192
)
@@ -227,9 +201,10 @@ def parse_expenses_from_messages(
227201
return expenses
228202

229203
@staticmethod
230-
def get_restaurant(message: GmailMessage, filters: dict[str, str]) -> Optional[str]:
204+
@apply_restaurant_filter
205+
def get_restaurant(message: GmailMessage) -> Optional[str]:
231206
restaurant = message.subject.split("receipt for ")[1].split(".")[0]
232-
return FoodExpenseParser.find_and_replace_string_value(restaurant, filters)
207+
return restaurant
233208

234209
@staticmethod
235210
def get_total_payed(message: GmailMessage) -> float:
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"trim": [
3+
"\u00ae",
4+
" 🐠",
5+
" (Marquês de Pombal)",
6+
"® (Saldanha)",
7+
" (Saldanha)",
8+
" (General Roçadas)",
9+
" (Fontes Pereira de Melo)",
10+
" (São Sebastião)",
11+
" (Graça)",
12+
" (Monumental)",
13+
" (Saldanha Residence)",
14+
" (República)",
15+
" (Sta",
16+
" (Barata Salgueiro)",
17+
" (Rossio)",
18+
" - by Street Chow",
19+
" - Saldanha Avenida Casal Ribeiro, 50 B , 1000-093 To Praça Aniceto do Rosário, Lisbon 1 Hamburguer X",
20+
" - Saldanha Av. Miguel Bombarda, 23B",
21+
" Rua do saco 50, 1150-284 Lisboa To Praça Aniceto do Rosário, Lisbon 1 🎁 2x1",
22+
" - Av. Roma Avenida de Roma 74 B",
23+
" Rua Marquês de Fronteira 117F",
24+
", 1070-292 Lisboa To Praça Aniceto do Rosário, Lisbon 1 × 🎁 2x1",
25+
" Av. Da República, 97 B",
26+
", 1070",
27+
" Praça do Chile 8 Lisboa 1000",
28+
" Rua da Penha de França",
29+
" Centro de Lazer do Campo Pequeno loja 412",
30+
" Av. Duque de Ávila 46B",
31+
" Rua do saco 50, 1150"
32+
],
33+
"replace": {
34+
"'": "'",
35+
"&": "&"
36+
}
37+
}

gmail_fisher/utils/json_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import json
2+
3+
from gmail_fisher import logger
4+
5+
6+
class JsonUtils:
7+
@classmethod
8+
def load_dict_from_json(cls, filters_path):
9+
with open(filters_path) as filters:
10+
json_content = filters.read()
11+
12+
filters_dict = json.loads(json_content)
13+
14+
return filters_dict
15+
16+
@classmethod
17+
def write_to_json_file(cls, json_content, json_path):
18+
file = open(json_path, "w")
19+
file.write(json_content)
20+
file.close()
21+
logger.success(f"Successfully written results to {json_path=}")
Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,29 @@
1-
from gmail_fisher.data.models import BoltFoodExpense, UberEatsExpense, GmailMessage
2-
from gmail_fisher.parsers.food import BoltFoodParser, UberEatsParser
1+
import pytest
2+
3+
from gmail_fisher.data.models import UberEatsExpense, BoltFoodExpense
4+
from gmail_fisher.parsers.food import FoodExpenseParser, BoltFoodParser, UberEatsParser
5+
6+
7+
@pytest.mark.parametrize(
8+
"input_value, expected_output",
9+
[
10+
("Sushi Place (General Roçadas)", "Sushi Place"),
11+
("Honorato - Saldanha Av. Miguel Bombarda, 23B", "Honorato"),
12+
("Pizza Lizzy", "Pizza Lizzy"),
13+
(
14+
"Sabores do Campo Centro de Lazer do Campo Pequeno loja 412",
15+
"Sabores do Campo",
16+
),
17+
("Chimarrão Praça do Chile 8 Lisboa 1000", "Chimarrão"),
18+
("Chickinho Rua Marquês de Fronteira 117F", "Chickinho"),
19+
("Hello Beijing Av. Da República, 97 B", "Hello Beijing"),
20+
("Udon Av. Duque de Ávila 46B", "Udon"),
21+
],
22+
)
23+
def test_apply_restaurant_filters(input_value, expected_output):
24+
result = FoodExpenseParser.apply_restaurant_filters(input_value)
25+
26+
assert result == expected_output
327

428

529
def test_parse_bolt_expenses_from_messages(bolt_food_messages):
@@ -36,9 +60,3 @@ def test_parse_uber_eats_expenses_from_messages(uber_eats_messages):
3660
date="2020-09-19",
3761
),
3862
]
39-
40-
41-
def test_total_payed(bolt_email_html_body):
42-
BoltFoodParser.get_total_payed(
43-
message=GmailMessage(id="id", subject="", date="", body=bolt_email_html_body)
44-
)

0 commit comments

Comments
 (0)