-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathreadxls.py
More file actions
31 lines (27 loc) · 918 Bytes
/
readxls.py
File metadata and controls
31 lines (27 loc) · 918 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
from setenvrion import xls_dir
import difflib,os,random,shutil
def ele_similar(str1,str2):
threshold=0.9
similarity = difflib.SequenceMatcher(None, str1, str2).ratio()
return similarity>=threshold
def similarity(finalpdfs):
finalpdfs=list(set(finalpdfs))
df = pd.read_excel(f'{xls_dir}/WASAPaperList.xls',usecols=[1])
# 打印DataFrame
acceptpdfs=[]
#print(df)
#print(df.values[2,0])
for i in range(2,116):
acceptpdfs.append(df.values[i,0].replace(':',''))
commonpdfs = []
for str1 in acceptpdfs:
for str2 in finalpdfs:
if ele_similar(str1,str2):
commonpdfs.append(str1)
print(1)
print(str1)
print(str2)
#print(acceptpdfs)
#commonpdfs=[pdf for pdf in acceptpdfs if pdf in finalpdfs]
return round(float(len(commonpdfs)/len(acceptpdfs)),4)