-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions.py
More file actions
executable file
·105 lines (76 loc) · 2.57 KB
/
functions.py
File metadata and controls
executable file
·105 lines (76 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# coding: UTF-8
import arxiv
import pandas as pd
import datetime
import smtplib
from email.mime.text import MIMEText
from email.utils import formatdate
def now():
'''
return datetime on now
'''
t_delta = datetime.timedelta(hours=9) # 9時間
JST = datetime.timezone(t_delta, 'JST') # UTCから9時間差
now = datetime.datetime.now(JST)
return now
def recentDates(now, A,B):
'''
return two recent dates from today's date (JST)
'''
# recent dates
dateA = now + datetime.timedelta(days=-A) # A日前
dateB = now + datetime.timedelta(days=-B) # B日前
dateA_s = str(dateA.year) + str(dateA.month).zfill(2) + str(dateA.day).zfill(2)
dateB_s = str(dateB.year) + str(dateB.month).zfill(2) + str(dateB.day).zfill(2)
return dateA_s, dateB_s
def getdf(dateA_s, dateB_s, max_results):
'''
get data by arxiv package
'''
search = arxiv.Search(
query = f'cat:hep-th AND submittedDate:[{dateA_s} TO {dateB_s}235959]',
max_results = max_results,
sort_by = arxiv.SortCriterion.SubmittedDate,
)
# pack into df
link = []
title = []
publishedDate = []
authors = []
abstract = []
for result in search.results():
link.append(result.entry_id)
title.append(result.title)
publishedDate.append(result.published)
abstract.append(result.summary)
# treatment for authors
authors_list = []
for i in result.authors:
authors_list.append( str(i) ) # strにする
authors.append( ",".join(authors_list) ) # さらにstrの1要素にまとめる
# make df
df = pd.DataFrame(columns=['link','title','publishedDate','authors','abstract'])
df['link'] = link
df['title'] = title
df['publishedDate'] = publishedDate
df['authors'] = authors
df['abstract'] = abstract
return df
def search(df, kwds):
'''
search by keywords
'''
df2 = pd.DataFrame()
# search
if len(df) != 0: # 土日は更新なしなのでHitしない
for i in range(len(kwds)):
df2[f'Key_{kwds[i]}'] = df['abstract'].str.contains(kwds[i], case=False) # 大文字小文字区別無しで検索
# make flag
flag = []
for i in range(len(df2)):
flag.append( any( df2.iloc[i, :] ) ) # 1つでもtrueならtrue
df3 = pd.concat([df,df2], axis=1)
df3['anyKey'] = flag
df_hit = df3[ df3['anyKey'] ] # filter
df_hit2 = df_hit.drop({'anyKey','abstract'},axis=1)
return df_hit2