Skip to content

Commit 55ae4d8

Browse files
authored
Code refactoring : use Twitter instead of Facebook (#17)
Fetch horoscope from Twitter using https://github.com/twintproject/twint
1 parent f873bd3 commit 55ae4d8

File tree

11 files changed

+1059
-171
lines changed

11 files changed

+1059
-171
lines changed

Dockerfile

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1-
FROM python:3.6.9
1+
FROM python:3.9.6-buster
22

33
WORKDIR /root/Horoscope
44

5-
RUN pip3 install --upgrade pip
5+
# INSTALL PIPENV
6+
RUN pip3 install pipenv
67

7-
ADD requirements.txt requirements.txt
8+
COPY Pipfile Pipfile
9+
COPY Pipfile.lock Pipfile.lock
10+
RUN pipenv install --dev --system --deploy
11+
#--dev — Install both develop and default packages from Pipfile.
12+
#--system — Use the system pip command rather than the one from your virtualenv.
13+
#--deploy — Make sure the packages are properly locked in Pipfile.lock, and abort if the lock file is out-of-date.
814

9-
RUN pip3 install -r requirements.txt
10-
11-
RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-fra
15+
RUN apt-get update &&\
16+
apt-get install -y tesseract-ocr tesseract-ocr-fra
1217

1318
CMD python3 -u horoscope_bot.py

Dockerfile.lock

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
FROM python:3.9.6-buster
2+
3+
WORKDIR /root/Horoscope
4+
5+
# INSTALL PIPENV
6+
RUN pip3 install pipenv

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
build:
2+
docker build -t horoscope .
3+
run:
4+
docker run -d -v $(PWD):/root/Horoscope --name horoscope horoscope
5+
build_lock:
6+
docker build -t lock -f Dockerfile.lock .
7+
lock:
8+
docker run -v $(PWD):/root/Horoscope --rm lock pipenv lock

Pipfile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
[[source]]
2+
url = "https://pypi.org/simple"
3+
verify_ssl = true
4+
name = "pypi"
5+
6+
[packages]
7+
discord = "*"
8+
asyncio = "*"
9+
aiohttp = "*"
10+
pytz = "*"
11+
numpy = "*"
12+
pandas = "*"
13+
tqdm = "*"
14+
pytesseract = "*"
15+
scikit-learn = "*"
16+
twint = {editable = true, ref = "origin/master", git = "https://github.com/twintproject/twint.git"}
17+
requests = "*"
18+
matplotlib = "*"
19+
Pillow = "*"
20+
nest-asyncio = "*"
21+
22+
[dev-packages]
23+
24+
[requires]
25+
python_version = "3.9"

Pipfile.lock

Lines changed: 881 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

build.sh

Lines changed: 0 additions & 1 deletion
This file was deleted.

horoscope_bot.py

Lines changed: 109 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
11
# coding: utf8
22
import discord
3-
from discord.ext import commands
43
import asyncio
5-
import aiohttp
6-
import datetime as dt
7-
import hashlib
4+
import re
85
import os
96
import pytz
7+
import pickle
8+
import datetime as dt
9+
import numpy as np
10+
11+
from PIL import Image
12+
from collections import Counter
13+
from typing import Optional, List
1014

1115
from my_constants import TOKEN, IMG_FOLDER, channel_horoscope
12-
from scraper import get_last_image, download_image
16+
from scraper import get_last_images, download_image
1317
from parse import parse_horoscope, reformat_horoscope
1418
from utils import convert_timedelta, md5
1519

16-
import pickle
17-
from PIL import Image
18-
from collections import Counter
19-
import numpy as np
20+
import nest_asyncio
21+
nest_asyncio.apply()
22+
2023

2124
manual = """
2225
```Help
@@ -27,19 +30,24 @@
2730
"""
2831

2932
tz_paris = pytz.timezone("Europe/Paris")
33+
TIMESTAMP_FORMAT = "%Y-%m-%d"
34+
USERNAME = "RTL2officiel"
3035

3136
# top,left,bottow,right
32-
true_width, true_height = 1181, 1716
37+
true_width, true_height = 2362, 3431
3338
# True Horoscope has the following
3439
# color proportions
35-
#Counter({0: 134576, 1: 132231, 2: 28443})
36-
# Tested on a header of size 1181*250
37-
crop_height = 250
38-
true_prop = np.array([134576, 132231, 28443])/(true_width * crop_height)
39-
rtl2_header = np.array([0, 0, 1181, 250])
40+
true_occurences = Counter({1: 960179, 0: 750054, 2: 179367})
41+
# Tested on a header of size true_width * crop_height
42+
crop_height = 800
43+
true_proportions = np.array([true_occurences[0], true_occurences[1], true_occurences[2]])/(true_width * crop_height)
44+
rtl2_header = np.array([0, 0, true_width, crop_height])
4045

4146
kmeans = pickle.load(open("horoscope_kmeans.pickle", "rb"))
4247

48+
def now():
49+
return dt.datetime.now().astimezone(tz_paris)
50+
4351
def is_horoscope(filename, verbose=False):
4452
"""Check if it is a horoscope or not
4553
Step 1 : check the picture size
@@ -64,18 +72,18 @@ def is_horoscope(filename, verbose=False):
6472

6573
# Step 2
6674
k = width/true_width
67-
pixels = np.array(photo.crop(tuple(k*rtl2_header)).getdata())
75+
pixels = np.array(photo.crop(tuple(k*rtl2_header))).reshape(-1, 3)
6876
occurences = Counter(kmeans.predict(pixels))
69-
proportions = np.array([occ for occ in occurences.values()])/(k*true_width * k*crop_height)
77+
proportions = np.array([occurences[0], occurences[1], occurences[2]])/(k*true_width * k*crop_height)
7078
if verbose:
7179
print(proportions, "Image proportions")
72-
print(true_prop, "True proportions")
73-
print(np.sum(np.abs(true_prop - proportions)), "Distance")
74-
return np.sum(np.abs(true_prop - proportions)) < 0.03
80+
print(true_proportions, "True proportions")
81+
print(np.sum(np.abs(true_proportions - proportions)), "Distance")
82+
return np.sum(np.abs(true_proportions - proportions)) < 0.03
7583

7684

7785

78-
class MyClient(discord.Client):
86+
class HoroscopeDiscordBot(discord.Client):
7987
def __init__(self, *args, **kwargs):
8088
super().__init__(*args, **kwargs)
8189

@@ -88,13 +96,28 @@ async def on_ready(self):
8896
print(f"Création du dossier {IMG_FOLDER}")
8997
os.mkdir(IMG_FOLDER)
9098

91-
print("[" + dt.datetime.now().astimezone(tz_paris).ctime() + "] - Bot ready :-)")
99+
print(f"[{now().ctime()}] - Bot ready :-)")
92100
print('Logged in as')
93101
print(self.user.name)
94102
print(self.user.id)
95103
print('------')
96104

97-
async def job(self, fetch_interval=300, days=[0,1,2,3,4], hours=[10,11,12,13]):
105+
def is_for_bot(self, message) -> bool:
106+
"""Check if the message is for the bot.
107+
Args:
108+
message: Discord message
109+
"""
110+
return re.match(f"^<@!?{self.user.id}>", message.content)
111+
112+
def command(self, message, cmd: str) -> str:
113+
"""Check if the command match the one found in message.
114+
Args:
115+
message: Discord message
116+
cmd: command people sent
117+
"""
118+
return re.match(f"^<@!?{self.user.id}> {cmd}", message.content)
119+
120+
async def job(self, fetch_interval=300, days=[0,1,2,3,4], hours=[9,10,11,12]):
98121
""" Job to run evey `fetch_interval` seconds,
99122
each day in days, between hours
100123
Args:
@@ -115,18 +138,20 @@ async def job(self, fetch_interval=300, days=[0,1,2,3,4], hours=[10,11,12,13]):
115138
assert min(hours) >= 0, "Need number between 0 and 23"
116139

117140
while not self.is_closed():
118-
today = dt.datetime.today().astimezone(tz_paris)
119-
while today.weekday() in days and today.hour in hours and not await self.fetch_new_horoscope():
141+
today = now()
142+
143+
while today.weekday() in days and today.hour in hours \
144+
and not await self.fetch_new_horoscope():
120145
# while (it's time to fetch horoscope) AND (the horoscope has not been published yet)
121-
# wait fetch_interval to not spam Facebook
146+
# wait fetch_interval to not spam Twitter
122147
await asyncio.sleep(fetch_interval)
148+
123149
time_to_wait = self.get_time_to_wait(hours).total_seconds()
124-
print("[" + dt.datetime.now().astimezone(tz_paris).ctime() + "] - " + f"Reprise de l'activité dans {time_to_wait} secondes.")
125-
await asyncio.sleep(time_to_wait)
150+
time_to_wait_message = f"[{now().ctime()}] - " +\
151+
f"Reprise de l'activité dans {time_to_wait} secondes."
152+
print(time_to_wait_message)
126153

127-
def command(self, cmd):
128-
"""Wrapper for bot prefix"""
129-
return f"<@{self.user.id}> " + str(cmd)
154+
await asyncio.sleep(time_to_wait)
130155

131156
async def on_message(self, message):
132157
"""Handle messages
@@ -135,20 +160,19 @@ async def on_message(self, message):
135160
if message.author == client.user:
136161
return
137162

138-
if message.content == self.command("help"):
163+
if self.command(message, "help"):
139164
await self.get_channel(channel_horoscope).send(manual.format(id=self.user.id))
140165

141-
if message.content == self.command("test"):
142-
await self.fetch_new_horoscope(force=True)
143-
144-
if message.content.startswith(self.command("download")):
166+
if self.command(message, "download"):
145167
img_href = message.content.split(" ")[-1]
146168
if img_href.startswith("http") and await self.fetch_new_horoscope(img_href=img_href):
147169
time_to_wait = self.get_time_to_wait([10,11,12]).total_seconds()
148-
print("[" + dt.datetime.now().astimezone(tz_paris).ctime() + "] - " + f"Reprise de l'activité dans {time_to_wait} secondes.")
170+
time_to_wait_message = f"[{now().ctime()}] - " +\
171+
f"Reprise de l'activité dans {time_to_wait} secondes."
172+
print(time_to_wait_message)
149173
await asyncio.sleep(time_to_wait)
150174

151-
if message.content == self.command("last"):
175+
if self.command(message, "last"):
152176
files = sorted(os.listdir(IMG_FOLDER), reverse=True)
153177
if len(files) == 0:
154178
await self.get_channel(channel_horoscope).send("Aucun horoscope en stock :-(")
@@ -157,69 +181,82 @@ async def on_message(self, message):
157181
await self.parse_and_send_horoscope(horoscope_img)
158182

159183
async def parse_and_send_horoscope(self, filename):
160-
"""Parse the image and send the image and the text
161-
found through OCR
162-
"""
184+
"""Parse the image and send the image and the text found through OCR"""
163185
print("OCR : en cours.")
164186
horoscope_dict = parse_horoscope(filename, threads=1)
165187
horoscope_str = reformat_horoscope(horoscope_dict)
166188
print("OCR : terminé.")
167189
await self.get_channel(channel_horoscope).send(file=discord.File(filename))
168190
await self.get_channel(channel_horoscope).send(horoscope_str)
169191

170-
async def fetch_new_horoscope(self, img_href=None, force=False):
171-
"""Get last image from RTL2 Facebook page,
172-
check if it's a new horoscope (using md5)
192+
async def fetch_new_horoscope(self, img_href: Optional[str] = None):
193+
"""Get last image from RTL2 Twitter page, check if it's a new horoscope (using md5)
173194
and send the file on Discord
174195
Args:
175-
img_href (str) : if not None, download the image
176-
from <img_href> url
177-
force (bool) : if True, download the last image
178-
and send it as it is
196+
img_href : if not None, download the image from <img_href> url
179197
"""
180198

181-
print("[" + dt.datetime.now().astimezone(tz_paris).ctime() + "] - Fetch Horoscope")
182-
if not img_href:
183-
print("Récupération du dernier lien.")
184-
img_href = await get_last_image()
185-
print("Téléchargement de l'image...")
186-
187-
if force:
188-
filename = await download_image(img_href, filename=IMG_FOLDER + "/" + "9999-99-99_test.jpg")
189-
await self.get_channel(channel_horoscope).send(file=discord.File(filename))
190-
return True
199+
print(f"[{now().ctime()}] - Fetch Horoscope")
200+
if img_href:
201+
print(f"[{now().ctime()}] - Lien fourni par l'utilisateur : {img_href}.")
202+
img_hrefs = [img_href]
203+
else:
204+
print(f"[{now().ctime()}] - Récupération des dernières images depuis Twitter.")
205+
today = now().strftime("%Y-%m-%d")
206+
img_hrefs = get_last_images(username=USERNAME, since=today)
191207

192-
filename = await download_image(img_href)
193208
files = sorted(os.listdir(IMG_FOLDER + "/"), reverse=True)
194209

195-
f1 = IMG_FOLDER + "/" + files[0]
196-
f2 = ""
197-
198-
if len(files) >= 1:
199-
f2 = IMG_FOLDER + "/" + files[1]
210+
if len(img_hrefs) > 0:
211+
print("Téléchargement des images...")
212+
else:
213+
print("Pas d'images tweetées aujourd'hui !")
214+
215+
for img_href in img_hrefs:
216+
filename = await download_image(img_href)
217+
218+
new_image = IMG_FOLDER + "/" + files[0]
219+
220+
if len(files) >= 1:
221+
old_image = IMG_FOLDER + "/" + files[1]
222+
else:
223+
old_image = ""
224+
225+
print(f"Test de l'image {img_href}")
226+
if is_horoscope(new_image, verbose=True):
227+
print("C'est un horoscope !")
228+
if md5(new_image) == md5(old_image):
229+
print("C'est l'horoscope d'hier")
230+
# Stop research
231+
return False
232+
else:
233+
print("C'est l'horoscope du jour")
234+
await self.parse_and_send_horoscope(new_image)
235+
# Stop research
236+
return True
237+
else:
238+
print("Ce n'est pas un nouveau horoscope")
239+
# Continue research
200240

201-
print("Test de l'image : est-ce l'horoscope ?")
202-
if is_horoscope(f1) and (md5(f1) != md5(f2)):
203-
print("C'est l'horoscope !")
204-
await self.parse_and_send_horoscope(f1)
205-
return True
206-
print("Ce n'est pas l'horoscope")
207241
return False
208242

209243
def get_time_to_wait(self, hours):
210244
"""How many time to wait before checking
211245
for a new horoscope ?
212246
"""
213247

214-
today = dt.datetime.today().astimezone(tz_paris)
248+
today = now()
215249
# Wait until tomorrow
216250
days_to_wait = 1
217251
if today.weekday() == 4:
218252
# it's Friday -> wait two more days
219253
days_to_wait += 2
254+
if today.weekday() == 5:
255+
# it's Saturday -> wait one more day
256+
days_to_wait += 1
220257
next_day = today.replace(hour=hours[0],minute=0,second=0,microsecond=0) + dt.timedelta(days=days_to_wait)
221258
return next_day-today
222259

223260
if __name__ == "__main__":
224-
client = MyClient()
261+
client = HoroscopeDiscordBot()
225262
client.run(TOKEN)

horoscope_kmeans.pickle

6.86 MB
Binary file not shown.

0 commit comments

Comments
 (0)