Skip to content

Commit 2d748e9

Browse files
Merge pull request #2 from AlbertoCuadra/develop
Solve: error hyphens
2 parents 61936a3 + f6a8e0d commit 2d748e9

File tree

1 file changed

+23
-7
lines changed

1 file changed

+23
-7
lines changed

doi_scraper.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# PhD Candidate - Group Fluid Mechanics
1212
# Universidad Carlos III de Madrid
1313
#
14-
# Last update May 12 2023
14+
# Last update May 13 2023
1515

1616
import re
1717
import requests
@@ -23,6 +23,13 @@
2323
INDENT_POST = 16 # Number of spaces after the field name
2424

2525

26+
# Function that prepares a given title for comparison
27+
def prepare_title(title):
28+
title = title.lower()
29+
title = re.sub(r'[–‐]', '-', title)
30+
title = re.sub(r'--', '-', title)
31+
return title
32+
2633
# Function to get DOI based on article title
2734
def get_doi(title):
2835
# Set request
@@ -37,17 +44,26 @@ def get_doi(title):
3744
# Sort items by published date (newest first)
3845
items = sorted(data['message']['items'], key=lambda x: x.get('created', {}).get('date-time'), reverse=True)
3946

47+
# Prepare title for comparison
48+
title_lower = prepare_title(title)
49+
50+
# Search for DOI
4051
for item in items:
4152
item_title = item.get('title', [''])[0]
42-
doi = item['DOI']
43-
if not doi.endswith('.vid') and title.lower() in item_title.lower():
44-
return doi
53+
54+
# Prepare title for comparison
55+
item_title_lower = prepare_title(item_title)
56+
57+
# print('Comparing:\n', title_lower, '\n', item_title_lower, '\n') # (debug)
58+
59+
# Compare titles
60+
if title_lower in item_title_lower:
61+
doi = item['DOI']
62+
if not doi.endswith('.vid'):
63+
return doi
4564

4665
return ''
4766

48-
49-
50-
5167
def process_bib_line(line, current_item):
5268
if line.startswith('@'):
5369
if current_item:

0 commit comments

Comments
 (0)