Skip to content

Commit 11bf267

Browse files
fetching now added
1 parent b4600f4 commit 11bf267

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ for response,version_time in WaybackMachine(url, start = "2020-05-01", end = "20
6969

7070
*String representation of timedelta will be added.*
7171

72+
### Current version
73+
74+
If `start` is `None`, the looup will start with fetching current page (not from archive).
75+
76+
To avoid this behavior, manually set `start = datetime.datetime.now()`.
77+
7278

7379

7480
### Configurations

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
setuptools.setup(
1414
name = 'waybackmachine',
15-
version = '0.1.7',
15+
version = '0.2.0',
1616
author = 'Martin Beneš',
1717
author_email = 'martinbenes1996@gmail.com',
1818
description = 'Envelope for archive.org API.',
@@ -21,7 +21,7 @@
2121
packages=setuptools.find_packages(),
2222
license='MPL',
2323
url = 'https://github.com/martinbenes1996/waybackmachine',
24-
download_url = 'https://github.com/martinbenes1996/waybackmachine/archive/0.1.7.tar.gz',
24+
download_url = 'https://github.com/martinbenes1996/waybackmachine/archive/0.2.0.tar.gz',
2525
keywords = ['waybackmachine', 'archive', 'web', 'html', 'webscraping'],
2626
install_requires = reqs,
2727
package_dir={'': '.'},

waybackmachine/fetch.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class WaybackMachine:
2222
def __init__(self, url, start = None, end = None, step = None, config = 'default'):
2323
self._log = logging.getLogger(self.__class__.__name__)
2424
self._url = url
25+
# current version too
26+
self._current = start is None
2527
# parse config
2628
try:
2729
self._start, self._end, self._step = [i() for i in self._config[config]]
@@ -41,13 +43,23 @@ def end(self):
4143
return self._end
4244
def step(self):
4345
return self._step
44-
46+
47+
def _current_version(self):
48+
self._log.info(f"searching version now")
49+
# fetch
50+
connection_fail = False
51+
try: response = requests.get(self._url)
52+
except: connection_fail = True
53+
if connection_fail:
54+
raise WaybackMachineError("failed connecting to archive")
55+
return response, datetime.now()
4556
def __iter__(self):
57+
if self._current: yield self._current_version()
4658
# yield date sequence from archive
4759
versions = set()
4860
while not self._now or self._now > self._end:
4961
now = self._now.strftime('%Y-%m-%d %H:%M:%S') if self._now else "now"
50-
self._log.info(f"searching in time {now}")
62+
self._log.info(f"searching archive {now}")
5163
# get older version
5264
archive_url = self._construct_archive_url(self._now)
5365
response,version_time = self._fetch_archive(archive_url)

0 commit comments

Comments
 (0)