forked from hochschule-darmstadt/openartbrowser-social
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.py
More file actions
152 lines (120 loc) · 5.19 KB
/
index.py
File metadata and controls
152 lines (120 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import json
import tweepy
import requests
from config import *
from constant import *
from constant import defaultSize
import re
from resize_img import *
from PIL import Image
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
class TwitterBot:
def __init__(self, artworkQIDs):
self.artworkQIDs = artworkQIDs
# DATA FOR TWITTER POST
# Get the QID of the artwork we are going to post into "postQID"
postQID = self.checkPostedArtworks(self.artworkQIDs)
postQIDPos = artworkQIDs.index(postQID)
print("artworkQIDs: ", artworkQIDs)
print("randorQID:", postQID)
print("----------------------------------------------------------------------")
# Name des Kustwerks
artwork_titel = body["hits"]["hits"][postQIDPos]["_source"]["label"]
print("artwork_titel: ", artwork_titel)
# Link des image
image_link = body["hits"]["hits"][postQIDPos]["_source"]["image"]
print("Image_Link: ", image_link)
# TODO:alle artist soll extrahiert werden
# QID von Kunstler
artistQID = body["hits"]["hits"][postQIDPos]["_source"]["artists"][0]
print("artistQID", artistQID)
artist_query = {
'size': '1',
'query': {
"match": {
"id": artistQID
}
}
}
response_artist = requests.get(url, data=json.dumps(artist_query), headers=headers)
body_artist = response_artist.json()
# Name of artist
postArtistName = body_artist["hits"]["hits"][0]["_source"]["label"]
print("artist_name:", postArtistName)
self.image_download(image_link)
imgpath = "/home/ahmad/PycharmProjects/openartbrowser-social/image.jpg"
imgSizeByte = os.path.getsize(imgpath)
print("imgSizeByte ist: ", imgSizeByte)
if imgSizeByte > image_limit:
self.addPostedArtwork(postQID)
TwitterBot(self.artworkQIDs)
else:
if imgSizeByte > defaultSize:
limit_img_size(
imgpath, # input file
imgpath, # target file
3000000, # bytes
tolerance=5 # percent of what the file may be bigger than target_filesize
)
imgSizeBytenew = os.path.getsize(imgpath)
print("new imagesize: ", imgSizeBytenew)
url_post = f"https://openartbrowser.org/en/artwork/{postQID}"
tweet_text = f" \"{postArtistName}\" is the Artist von \"{self.sanitize(artwork_titel)}\". To see more infos see {url_post}"
print(tweet_text)
api.update_with_media("/home/ahmad/PycharmProjects/openartbrowser-social/image.jpg", tweet_text)
# IF TWITTER POST SUCCESSFULL
self.addPostedArtwork(postQID)
# All posted artwork IDs are written to a file. In this method, we will find the first artwork from the provided list "artworkIDList" (top-down) which has not yet been posted and return it.
def checkPostedArtworks(self, artworkQIDs):
posted = False
postedList = [""]
with open("posted.txt", "r") as file:
for line in file:
postedList.append(line)
for x in artworkQIDs:
for y in postedList:
if y.__contains__(x):
posted = True
if posted == False:
file.close()
return x
else:
posted = False
def image_download(self, link):
response = requests.get(link)
file = open("image.jpg", "wb")
file.write(response.content)
file.close()
def sanitize(self, text: str):
"""
This function removes linebreaks, carriage returns, duplicated spaces and all leading
and trailing spaces from the passed string and returns the sanitized one. This function should be used
for fields which contain longer strings like 'label' or 'term'.
"""
remove_chars = ["-", "—", "(", ")", ";", "&", "?", "/", ",", "\"", "."]
replace_chars = [" "]
for c in remove_chars:
text = text.replace(c, "")
# remove newline and carriage return
sanitized_text = text.replace('\n', ' ').replace('\r', '')
# remove duplicated spaces and remove leading and trailing spaces
sanitized_text = re.sub(' +', ' ', sanitized_text)
sanitized_text = sanitized_text.strip()
return sanitized_text
# This method will add the QID of an artwork to our persistent file so we do not post it again.
def addPostedArtwork(self, artworkQID):
with open("posted.txt", "a") as file:
file.write("\n" + artworkQID)
file.close()
print("Added artwork QID " + artworkQID + " to persistent file.")
response = requests.get(url, data=json.dumps(artwork_query), headers=headers)
body = response.json()
# Hole alle QIDs in eine Liste
artworkQIDs = []
for x in range(count):
# QID von Kunstwerk
postQID = body["hits"]["hits"][x]["_source"]["id"]
artworkQIDs.append(postQID)
TwitterBot(artworkQIDs)