diff --git a/articleDateExtractor/__init__.py b/articleDateExtractor/__init__.py index 8753e4b..57f7af7 100644 --- a/articleDateExtractor/__init__.py +++ b/articleDateExtractor/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Ran Geva' -import urllib2,re, json +import urllib,re, json from dateutil.parser import parse try: from bs4 import BeautifulSoup @@ -38,16 +38,16 @@ def _extractFromLDJson(parsedHTML): try: jsonDate = parseStrDate(data['datePublished']) - except Exception, e: + except Exception as e: pass try: jsonDate = parseStrDate(data['dateCreated']) - except Exception, e: + except Exception as e: pass - except Exception, e: + except Exception as e: return None @@ -205,7 +205,7 @@ def _extractFromHTMLTag(parsedHTML): def extractArticlePublishedDate(articleLink, html = None): - print "Extracting date from " + articleLink + print("Extracting date from " + articleLink) articleDate = None @@ -213,10 +213,10 @@ def extractArticlePublishedDate(articleLink, html = None): articleDate = _extractFromURL(articleLink) if html is None: - request = urllib2.Request(articleLink) + request = urllib.request.Request(articleLink) # Using a browser user agent, decreases the change of sites blocking this request - just a suggestion # request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36') - html = urllib2.build_opener().open(request).read() + html = urllib.request.build_opener().open(request).read() parsedHTML = BeautifulSoup(html,"lxml") @@ -230,8 +230,8 @@ def extractArticlePublishedDate(articleLink, html = None): articleDate = possibleDate except Exception as e: - print "Exception in extractArticlePublishedDate for " + articleLink - print e.message, e.args + print("Exception in extractArticlePublishedDate for " + articleLink) + print(e.message, e.args) @@ -242,4 +242,4 @@ def extractArticlePublishedDate(articleLink, html = None): if __name__ == '__main__': d = extractArticlePublishedDate("http://techcrunch.com/2015/11/30/atlassian-share-price/") - print d + print(d) diff --git a/setup.py b/setup.py index 4c7af77..1674795 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ description='Automatically extracts and normalizes an online article or blog post publication date', long_description=readme, install_requires=[ - "BeautifulSoup >= 3.2.1", + "BeautifulSoup4 >= 4.5.1", "python-dateutil >= 2.4.2" ], classifiers=( @@ -28,7 +28,6 @@ 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7' + 'Programming Language :: Python :: 3.0', ) -) \ No newline at end of file +)