Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from setuptools import setup
import os
from setuptools import setup

VERSION = "1.5"

Expand Down
57 changes: 24 additions & 33 deletions shot_scraper/cli.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import click
from click_default_group import DefaultGroup
import json
import os
import pathlib
from playwright.sync_api import sync_playwright, Error, TimeoutError
from runpy import run_module
import secrets
import subprocess
import sys
import textwrap
import time
import json
import os
import pathlib
from runpy import run_module
from click_default_group import DefaultGroup
import yaml
import click
from playwright.sync_api import sync_playwright, Error, TimeoutError


from shot_scraper.utils import filename_for_url, url_or_file_path

Expand Down Expand Up @@ -88,15 +89,13 @@ def skip_or_fail(response, skip, fail):
if str(response.status)[0] in ("4", "5"):
if skip:
click.echo(
"{} error for {}, skipping".format(response.status, response.url),
f"{response.status} error for {response.url}, skipping",
err=True,
)
# Exit with a 0 status code
raise SystemExit
elif fail:
raise click.ClickException(
"{} error for {}".format(response.status, response.url)
)
raise click.ClickException(f"{response.status} error for {response.url}")


def scale_factor_options(fn):
Expand Down Expand Up @@ -1024,7 +1023,7 @@ def pdf(
if output == "-":
sys.stdout.buffer.write(pdf)
elif not silent:
click.echo("PDF of '{}' written to '{}'".format(url, output), err=True)
click.echo(f"PDF of '{url}' written to '{output}'", err=True)

browser_obj.close()

Expand Down Expand Up @@ -1124,7 +1123,7 @@ def html(
open(output, "w").write(html)
if not silent:
click.echo(
"HTML snapshot of '{}' written to '{}'".format(url, output),
f"HTML snapshot of '{url}' written to '{output}'",
err=True,
)

Expand Down Expand Up @@ -1309,14 +1308,10 @@ def on_response(response):
# Check if page was a 404 or 500 or other error
if str(response.status)[0] in ("4", "5"):
if skip:
click.echo(
"{} error for {}, skipping".format(response.status, url), err=True
)
click.echo(f"{response.status} error for {url}, skipping", err=True)
return
elif fail:
raise click.ClickException(
"{} error for {}".format(response.status, url)
)
raise click.ClickException(f"{response.status} error for {url}")

if wait:
time.sleep(wait / 1000)
Expand Down Expand Up @@ -1365,9 +1360,7 @@ def on_response(response):
bytes_ = page.locator(selector_to_shoot).screenshot(**screenshot_args)
except TimeoutError as e:
raise click.ClickException(
"Timed out while waiting for element to become available.\n\n{}".format(
e
)
f"Timed out while waiting for element to become available.\n\n{e}"
)
if return_bytes:
return bytes_
Expand All @@ -1385,7 +1378,7 @@ def on_response(response):
return page.screenshot(**screenshot_args)
else:
page.screenshot(**screenshot_args)
message = "Screenshot of '{}' written to '{}'".format(url, output)
message = f"Screenshot of '{url}' written to '{output}'"

if not silent:
click.echo(message, err=True)
Expand All @@ -1396,22 +1389,20 @@ def _js_selector_javascript(js_selectors, js_selectors_all):
extra_selectors_all = []
js_blocks = []
for js_selector in js_selectors:
klass = "js-selector-{}".format(secrets.token_hex(16))
extra_selectors.append(".{}".format(klass))
klass = f"js-selector-{secrets.token_hex(16)}"
extra_selectors.append(f".{klass}")
js_blocks.append(
textwrap.dedent(
"""
f"""
Array.from(
document.getElementsByTagName('*')
).find(el => {}).classList.add("{}");
""".format(
js_selector, klass
)
).find(el => {js_selector}).classList.add("{klass}");
"""
)
)
for js_selector_all in js_selectors_all:
klass = "js-selector-all-{}".format(secrets.token_hex(16))
extra_selectors_all.append(".{}".format(klass))
klass = f"js-selector-all-{secrets.token_hex(16)}"
extra_selectors_all.append(f".{klass}")
js_blocks.append(
textwrap.dedent(
"""
Expand All @@ -1428,7 +1419,7 @@ def _js_selector_javascript(js_selectors, js_selectors_all):


def _selector_javascript(selectors, selectors_all, padding=0):
selector_to_shoot = "shot-scraper-{}".format(secrets.token_hex(8))
selector_to_shoot = f"shot-scraper-{secrets.token_hex(8)}"
selector_javascript = textwrap.dedent(
"""
new Promise(takeShot => {
Expand Down
6 changes: 3 additions & 3 deletions shot_scraper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def filename_for_url(url, ext=None, file_exists=file_exists_never):
suffix = 0
while file_exists(filename):
suffix += 1
filename = "{}.{}.{}".format(base_filename, suffix, ext)
filename = f"{base_filename}.{suffix}.{ext}"
return filename


def url_or_file_path(url, file_exists=file_exists_never):
# If url exists as a file, convert that to file:/
file_path = file_exists(url)
if file_path:
return "file:{}".format(file_path)
return f"file:{file_path}"
if not (url.startswith("http://") or url.startswith("https://")):
return "http://{}".format(url)
return f"http://{url}"
return url
4 changes: 2 additions & 2 deletions tests/test_shot_scraper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from click.testing import CliRunner
import pathlib
import pytest
import textwrap
from click.testing import CliRunner
import pytest
from shot_scraper.cli import cli
import zipfile
import json
Expand Down
3 changes: 1 addition & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from shot_scraper.utils import filename_for_url
import pytest

from shot_scraper.utils import filename_for_url

@pytest.mark.parametrize(
"url,ext,expected",
Expand Down
Loading