Skip to content

Commit 91c1b74

Browse files
authored
Merge pull request #281 from autoscrape-labs/fix/expect-download
Fix expect_download context manager
2 parents c1646c6 + dbfb2d9 commit 91c1b74

File tree

3 files changed

+187
-30
lines changed

3 files changed

+187
-30
lines changed

pydoll/browser/tab.py

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@
5151
)
5252
from pydoll.protocol.base import EmptyResponse, Response
5353
from pydoll.protocol.browser.events import (
54-
BrowserEvent,
5554
DownloadProgressEvent,
5655
DownloadWillBeginEvent,
5756
)
@@ -851,9 +850,13 @@ async def expect_download(
851850
behavior=DownloadBehavior.ALLOW,
852851
download_path=download_dir,
853852
browser_context_id=self._browser_context_id,
854-
events_enabled=True,
855853
)
856854

855+
_page_events_was_enabled = True
856+
if not self._page_events_enabled:
857+
_page_events_was_enabled = False
858+
await self.enable_page_events()
859+
857860
loop = asyncio.get_event_loop()
858861
will_begin: asyncio.Future[bool] = loop.create_future()
859862
done: asyncio.Future[bool] = loop.create_future()
@@ -889,13 +892,13 @@ async def on_progress(event: DownloadProgressEvent):
889892
if not done.done():
890893
done.set_result(True)
891894

892-
cb_id_will_begin = await self.on(
893-
BrowserEvent.DOWNLOAD_WILL_BEGIN,
895+
await self.on(
896+
PageEvent.DOWNLOAD_WILL_BEGIN,
894897
cast(Callable[[dict], Awaitable[Any]], on_will_begin),
895898
True,
896899
)
897900
cb_id_progress = await self.on(
898-
BrowserEvent.DOWNLOAD_PROGRESS,
901+
PageEvent.DOWNLOAD_PROGRESS,
899902
cast(Callable[[dict], Awaitable[Any]], on_progress),
900903
False,
901904
)
@@ -914,19 +917,37 @@ async def on_progress(event: DownloadProgressEvent):
914917
except asyncio.TimeoutError as exc:
915918
raise DownloadTimeout() from exc
916919
finally:
917-
await self.remove_callback(cb_id_progress)
918-
await self.remove_callback(cb_id_will_begin)
919-
await self._browser.set_download_behavior(
920-
behavior=DownloadBehavior.DEFAULT,
921-
browser_context_id=self._browser_context_id,
920+
await self._cleanup_download_context(
921+
cb_id_progress,
922+
_page_events_was_enabled,
923+
cleanup_dir,
924+
state,
925+
download_dir,
922926
)
923927

924-
if cleanup_dir:
925-
file_path = state['filePath']
926-
if not file_path:
927-
return
928-
Path(file_path).unlink(missing_ok=True)
929-
shutil.rmtree(download_dir, ignore_errors=True)
928+
async def _cleanup_download_context(
929+
self,
930+
cb_id_progress: int,
931+
page_events_was_enabled: bool,
932+
cleanup_dir: bool,
933+
state: dict[str, Any],
934+
download_dir: str,
935+
) -> None:
936+
await self.remove_callback(cb_id_progress)
937+
await self._browser.set_download_behavior(
938+
behavior=DownloadBehavior.DEFAULT,
939+
browser_context_id=self._browser_context_id,
940+
)
941+
942+
if cleanup_dir:
943+
file_path = state['filePath']
944+
if not file_path:
945+
return
946+
Path(file_path).unlink(missing_ok=True)
947+
shutil.rmtree(download_dir, ignore_errors=True)
948+
949+
if not page_events_was_enabled:
950+
await self.disable_page_events()
930951

931952
@overload
932953
async def on(

pydoll/protocol/page/events.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ class PageEvent(str, Enum):
273273
Args:
274274
visible (bool): True if the page is visible.
275275
"""
276+
DOWNLOAD_WILL_BEGIN = 'Page.downloadWillBegin'
277+
DOWNLOAD_PROGRESS = 'Page.downloadProgress'
276278

277279

278280
class DomContentEventFiredEventParams(TypedDict):

tests/test_browser/test_browser_tab.py

Lines changed: 148 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pydoll.protocol.fetch.types import RequestStage
1212
from pydoll.constants import By
1313
from pydoll.browser.tab import Tab
14-
from pydoll.protocol.browser.events import BrowserEvent
14+
from pydoll.protocol.page.events import PageEvent
1515
from pydoll.protocol.browser.types import DownloadBehavior
1616
from pydoll.exceptions import DownloadTimeout, InvalidTabInitialization
1717
from pydoll.exceptions import (
@@ -1077,13 +1077,13 @@ async def test_expect_download_keeps_file_when_path_provided(self, tab, tmp_path
10771077

10781078
async def fake_on(event_name, handler, temporary=False):
10791079
handlers[event_name] = handler
1080-
return 100 if event_name == BrowserEvent.DOWNLOAD_WILL_BEGIN else 101
1080+
return 100 if event_name == PageEvent.DOWNLOAD_WILL_BEGIN else 101
10811081

10821082
with patch.object(tab, 'on', fake_on):
10831083
async with tab.expect_download(keep_file_at=str(target_dir)) as download:
10841084
# Simulate willBegin
1085-
await handlers[BrowserEvent.DOWNLOAD_WILL_BEGIN]({
1086-
'method': BrowserEvent.DOWNLOAD_WILL_BEGIN,
1085+
await handlers[PageEvent.DOWNLOAD_WILL_BEGIN]({
1086+
'method': PageEvent.DOWNLOAD_WILL_BEGIN,
10871087
'params': {
10881088
'frameId': 'frame-1',
10891089
'guid': 'guid-1',
@@ -1092,8 +1092,8 @@ async def fake_on(event_name, handler, temporary=False):
10921092
}
10931093
})
10941094
# Simulate progress Completed without filePath (fallback to suggested)
1095-
await handlers[BrowserEvent.DOWNLOAD_PROGRESS]({
1096-
'method': BrowserEvent.DOWNLOAD_PROGRESS,
1095+
await handlers[PageEvent.DOWNLOAD_PROGRESS]({
1096+
'method': PageEvent.DOWNLOAD_PROGRESS,
10971097
'params': {
10981098
'guid': 'guid-1',
10991099
'totalBytes': 10,
@@ -1122,14 +1122,14 @@ async def test_expect_download_timeout_raises(self, tab, tmp_path):
11221122

11231123
async def fake_on(event_name, handler, temporary=False):
11241124
handlers[event_name] = handler
1125-
return 200 if event_name == BrowserEvent.DOWNLOAD_WILL_BEGIN else 201
1125+
return 200 if event_name == PageEvent.DOWNLOAD_WILL_BEGIN else 201
11261126

11271127
with patch.object(tab, 'on', fake_on):
11281128
with pytest.raises(DownloadTimeout):
11291129
async with tab.expect_download(keep_file_at=str(tmp_path), timeout=0.01):
11301130
# Trigger will begin but never complete
1131-
await handlers[BrowserEvent.DOWNLOAD_WILL_BEGIN]({
1132-
'method': BrowserEvent.DOWNLOAD_WILL_BEGIN,
1131+
await handlers[PageEvent.DOWNLOAD_WILL_BEGIN]({
1132+
'method': PageEvent.DOWNLOAD_WILL_BEGIN,
11331133
'params': {
11341134
'frameId': 'frame-1',
11351135
'guid': 'guid-2',
@@ -1147,22 +1147,22 @@ async def test_expect_download_cleans_temp_directory(self, tab, tmp_path):
11471147

11481148
async def fake_on(event_name, handler, temporary=False):
11491149
handlers[event_name] = handler
1150-
return 300 if event_name == BrowserEvent.DOWNLOAD_WILL_BEGIN else 301
1150+
return 300 if event_name == PageEvent.DOWNLOAD_WILL_BEGIN else 301
11511151

11521152
with patch.object(tab, 'on', fake_on):
11531153
# Use None to create temp dir and ensure cleanup occurs
11541154
async with tab.expect_download(keep_file_at=None) as download:
1155-
await handlers[BrowserEvent.DOWNLOAD_WILL_BEGIN]({
1156-
'method': BrowserEvent.DOWNLOAD_WILL_BEGIN,
1155+
await handlers[PageEvent.DOWNLOAD_WILL_BEGIN]({
1156+
'method': PageEvent.DOWNLOAD_WILL_BEGIN,
11571157
'params': {
11581158
'frameId': 'frame-1',
11591159
'guid': 'guid-3',
11601160
'url': 'https://example.com/tmp.txt',
11611161
'suggestedFilename': 'tmp.txt',
11621162
}
11631163
})
1164-
await handlers[BrowserEvent.DOWNLOAD_PROGRESS]({
1165-
'method': BrowserEvent.DOWNLOAD_PROGRESS,
1164+
await handlers[PageEvent.DOWNLOAD_PROGRESS]({
1165+
'method': PageEvent.DOWNLOAD_PROGRESS,
11661166
'params': {
11671167
'guid': 'guid-3',
11681168
'totalBytes': 3,
@@ -1182,6 +1182,140 @@ async def fake_on(event_name, handler, temporary=False):
11821182
# We cannot know the exact temp dir path (random), but ensure file is gone
11831183
assert not file_path.exists()
11841184

1185+
@pytest.mark.asyncio
1186+
async def test_expect_download_ignores_progress_with_different_guid(self, tab, tmp_path):
1187+
tab._browser.set_download_behavior = AsyncMock()
1188+
1189+
handlers = {}
1190+
1191+
async def fake_on(event_name, handler, temporary=False):
1192+
handlers[event_name] = handler
1193+
return 400 if event_name == PageEvent.DOWNLOAD_WILL_BEGIN else 401
1194+
1195+
with patch.object(tab, 'on', fake_on):
1196+
async with tab.expect_download(keep_file_at=str(tmp_path)) as download:
1197+
await handlers[PageEvent.DOWNLOAD_WILL_BEGIN]({
1198+
'method': PageEvent.DOWNLOAD_WILL_BEGIN,
1199+
'params': {
1200+
'frameId': 'frame-1',
1201+
'guid': 'guid-x',
1202+
'url': 'https://example.com/file.bin',
1203+
'suggestedFilename': 'file.bin',
1204+
}
1205+
})
1206+
1207+
# Wrong guid should be ignored and not mark as done
1208+
await handlers[PageEvent.DOWNLOAD_PROGRESS]({
1209+
'method': PageEvent.DOWNLOAD_PROGRESS,
1210+
'params': {
1211+
'guid': 'wrong-guid',
1212+
'totalBytes': 1,
1213+
'receivedBytes': 1,
1214+
'state': 'completed',
1215+
}
1216+
})
1217+
1218+
# Still not finished
1219+
assert download.file_path is None
1220+
1221+
# Correct guid completes
1222+
await handlers[PageEvent.DOWNLOAD_PROGRESS]({
1223+
'method': PageEvent.DOWNLOAD_PROGRESS,
1224+
'params': {
1225+
'guid': 'guid-x',
1226+
'totalBytes': 10,
1227+
'receivedBytes': 10,
1228+
'state': 'completed',
1229+
'filePath': str(tmp_path / 'file.bin'),
1230+
}
1231+
})
1232+
1233+
await download.wait_finished()
1234+
1235+
@pytest.mark.asyncio
1236+
async def test_expect_download_page_events_auto_enable_disable(self, tab, tmp_path):
1237+
"""When page events are disabled, expect_download should enable and then disable them."""
1238+
tab._browser.set_download_behavior = AsyncMock()
1239+
tab._page_events_enabled = False
1240+
1241+
enable_page_events = AsyncMock()
1242+
disable_page_events = AsyncMock()
1243+
1244+
handlers = {}
1245+
1246+
async def fake_on(event_name, handler, temporary=False):
1247+
handlers[event_name] = handler
1248+
return 500 if event_name == PageEvent.DOWNLOAD_WILL_BEGIN else 501
1249+
1250+
with patch.object(tab, 'enable_page_events', enable_page_events), \
1251+
patch.object(tab, 'disable_page_events', disable_page_events), \
1252+
patch.object(tab, 'on', fake_on):
1253+
async with tab.expect_download(keep_file_at=str(tmp_path)):
1254+
await handlers[PageEvent.DOWNLOAD_WILL_BEGIN]({
1255+
'method': PageEvent.DOWNLOAD_WILL_BEGIN,
1256+
'params': {
1257+
'frameId': 'frame-1',
1258+
'guid': 'guid-y',
1259+
'url': 'https://example.com/auto.bin',
1260+
'suggestedFilename': 'auto.bin',
1261+
}
1262+
})
1263+
await handlers[PageEvent.DOWNLOAD_PROGRESS]({
1264+
'method': PageEvent.DOWNLOAD_PROGRESS,
1265+
'params': {
1266+
'guid': 'guid-y',
1267+
'totalBytes': 2,
1268+
'receivedBytes': 2,
1269+
'state': 'completed',
1270+
'filePath': str(tmp_path / 'auto.bin'),
1271+
}
1272+
})
1273+
1274+
enable_page_events.assert_awaited_once()
1275+
disable_page_events.assert_awaited_once()
1276+
1277+
@pytest.mark.asyncio
1278+
async def test_expect_download_keeps_page_events_enabled_when_already_enabled(self, tab, tmp_path):
1279+
"""When page events already enabled, expect_download should not disable them on exit."""
1280+
tab._browser.set_download_behavior = AsyncMock()
1281+
tab._page_events_enabled = True
1282+
1283+
enable_page_events = AsyncMock()
1284+
disable_page_events = AsyncMock()
1285+
1286+
handlers = {}
1287+
1288+
async def fake_on(event_name, handler, temporary=False):
1289+
handlers[event_name] = handler
1290+
return 600 if event_name == PageEvent.DOWNLOAD_WILL_BEGIN else 601
1291+
1292+
with patch.object(tab, 'enable_page_events', enable_page_events), \
1293+
patch.object(tab, 'disable_page_events', disable_page_events), \
1294+
patch.object(tab, 'on', fake_on):
1295+
async with tab.expect_download(keep_file_at=str(tmp_path)):
1296+
await handlers[PageEvent.DOWNLOAD_WILL_BEGIN]({
1297+
'method': PageEvent.DOWNLOAD_WILL_BEGIN,
1298+
'params': {
1299+
'frameId': 'frame-1',
1300+
'guid': 'guid-z',
1301+
'url': 'https://example.com/enabled.bin',
1302+
'suggestedFilename': 'enabled.bin',
1303+
}
1304+
})
1305+
await handlers[PageEvent.DOWNLOAD_PROGRESS]({
1306+
'method': PageEvent.DOWNLOAD_PROGRESS,
1307+
'params': {
1308+
'guid': 'guid-z',
1309+
'totalBytes': 2,
1310+
'receivedBytes': 2,
1311+
'state': 'completed',
1312+
'filePath': str(tmp_path / 'enabled.bin'),
1313+
}
1314+
})
1315+
1316+
enable_page_events.assert_not_awaited()
1317+
disable_page_events.assert_not_awaited()
1318+
11851319
@pytest.mark.asyncio
11861320
async def test_disable_auto_solve_cloudflare_captcha(self, tab):
11871321
"""Test disabling auto-solve Cloudflare captcha."""

0 commit comments

Comments
 (0)