Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@ jobs:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.5, 3.6, 3.7, 3.8, 3.9]
include:
- python-version: "3.9"
- python-version: "3.10"
- python-version: "3.11"
- python-version: "3.12"
- python-version: "3.13"

steps:
- uses: actions/checkout@v2
Expand All @@ -21,7 +27,7 @@ jobs:
sudo apt-get install libdb-dev

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Cache pip
Expand All @@ -38,4 +44,4 @@ jobs:
pip install -r tests/requirements-test.txt
- name: Test with pytest
run: |
pytest
pytest
12 changes: 10 additions & 2 deletions scrapy_deltafetch/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from scrapy.http import Request
from scrapy.item import Item
from scrapy.utils.request import request_fingerprint
from scrapy.utils.request import RequestFingerprinter
from scrapy.utils.project import data_path
from scrapy.utils.python import to_bytes
from scrapy.exceptions import NotConfigured
Expand Down Expand Up @@ -41,6 +41,14 @@ def from_crawler(cls, crawler):
o = cls(dir, reset, crawler.stats)
crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)

try:
o.fingerprint = crawler.request_fingerprinter.fingerprint
except AttributeError:
from scrapy.utils.request import request_fingerprint

o.fingerprint = request_fingerprint

return o

def spider_opened(self, spider):
Expand Down Expand Up @@ -79,7 +87,7 @@ def process_spider_output(self, response, result, spider):
yield r

def _get_key(self, request):
key = request.meta.get('deltafetch_key') or request_fingerprint(request)
key = request.meta.get('deltafetch_key') or self.fingerprint(request)
return to_bytes(key)

def _is_enabled_for_request(self, request):
Expand Down
11 changes: 6 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
],
install_requires=['Scrapy>=1.1.0']
install_requires=['Scrapy>=1.1.0'],
python_requires='>=3.9',
)
31 changes: 26 additions & 5 deletions tests/test_deltafetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,17 @@
from scrapy.spiders import Spider
from scrapy.settings import Settings
from scrapy.exceptions import NotConfigured
from scrapy.utils.request import request_fingerprint
from scrapy.utils.python import to_bytes
from scrapy.statscollectors import StatsCollector
from scrapy.utils.test import get_crawler

try:
from scrapy.utils.request import request_fingerprint
_legacy_fingerprint=True
except ImportError:
from scrapy.utils.request import RequestFingerprinter
_legacy_fingerprint=False

from scrapy_deltafetch.middleware import DeltaFetch


Expand Down Expand Up @@ -124,7 +130,7 @@ def test_spider_opened_reset_non_existing_db(self):
self.spider.deltafetch_reset = True
mw.spider_opened(self.spider)
assert mw.db.get(b'random') is None

def test_spider_opened_recreate(self):
self._create_test_db()
mw = self.mwcls(self.temp_dir, reset=True, stats=self.stats)
Expand Down Expand Up @@ -185,7 +191,12 @@ def test_process_spider_output(self):

def test_process_spider_output_with_ignored_request(self):
self._create_test_db()
mw = self.mwcls(self.temp_dir, reset=False, stats=self.stats)
settings = {
"DELTAFETCH_DIR": self.temp_dir,
"DELTAFETCH_ENABLED": True,
}
crawler = get_crawler(Spider, settings_dict=settings)
mw = self.mwcls.from_crawler(crawler)
mw.spider_opened(self.spider)
response = mock.Mock()
response.request = Request('http://url')
Expand Down Expand Up @@ -316,10 +327,20 @@ def __init__(self, dir, reset=False, *args, **kwargs):
self.assertEqual(self.stats.get_value('deltafetch/stored'), None)

def test_get_key(self):
mw = self.mwcls(self.temp_dir, reset=True)
settings = {
"DELTAFETCH_DIR": self.temp_dir,
"DELTAFETCH_ENABLED": True,
"DELTAFETCH_RESET": True,
}
crawler = get_crawler(Spider, settings_dict=settings)
mw = self.mwcls.from_crawler(crawler)
test_req1 = Request('http://url1')
if _legacy_fingerprint:
fingerprint = request_fingerprint
else:
fingerprint = RequestFingerprinter.from_crawler(crawler).fingerprint
self.assertEqual(mw._get_key(test_req1),
to_bytes(request_fingerprint(test_req1)))
to_bytes(fingerprint(test_req1)))
test_req2 = Request('http://url2', meta={'deltafetch_key': b'dfkey1'})
self.assertEqual(mw._get_key(test_req2), b'dfkey1')

Expand Down
Loading