|
6 | 6 |
|
7 | 7 | # standard library
|
8 | 8 | import logging
|
9 |
| -import ssl |
10 | 9 | from collections.abc import Iterable
|
11 | 10 | from datetime import date, datetime
|
12 | 11 | from email.utils import format_datetime
|
13 | 12 | from mimetypes import guess_type
|
14 | 13 | from pathlib import Path
|
15 | 14 | from typing import Any
|
16 |
| -from urllib import request |
17 |
| -from urllib.error import HTTPError, URLError |
18 | 15 | from urllib.parse import urlencode, urlparse, urlunparse
|
19 | 16 |
|
20 | 17 | # 3rd party
|
|
24 | 21 | from mkdocs.plugins import get_plugin_logger
|
25 | 22 | from mkdocs.structure.pages import Page
|
26 | 23 | from mkdocs.utils import get_build_datetime
|
| 24 | +from requests import Session |
| 25 | +from requests.exceptions import HTTPError |
27 | 26 |
|
28 | 27 | # package
|
29 | 28 | from mkdocs_rss_plugin.constants import MKDOCS_LOGGER_NAME, REMOTE_REQUEST_HEADERS
|
@@ -106,7 +105,11 @@ def __init__(
|
106 | 105 | # save integrations
|
107 | 106 | self.social_cards = integration_material_social_cards
|
108 | 107 |
|
109 |
| - def build_url(self, base_url: str, path: str, args_dict: dict = None) -> str: |
| 108 | + # http/s session |
| 109 | + self.req_session = Session() |
| 110 | + self.req_session.headers.update(REMOTE_REQUEST_HEADERS) |
| 111 | + |
| 112 | + def build_url(self, base_url: str, path: str, args_dict: dict | None = None) -> str: |
110 | 113 | """Build URL using base URL, cumulating existing and passed path, \
|
111 | 114 | then adding URL arguments.
|
112 | 115 |
|
@@ -604,51 +607,44 @@ def get_remote_image_length(
|
604 | 607 | image_url: str,
|
605 | 608 | http_method: str = "HEAD",
|
606 | 609 | attempt: int = 0,
|
607 |
| - ssl_context: ssl.SSLContext = None, |
| 610 | + ssl_verify: bool = True, |
608 | 611 | ) -> int | None:
|
609 |
| - """Retrieve length for remote images (starting with 'http' \ |
610 |
| - in meta.image or meta.illustration). \ |
611 |
| - It tries to perform a HEAD request and get the length from the headers. \ |
612 |
| - If it fails, it tries again with a GET and disabling SSL verification. |
613 |
| -
|
614 |
| - :param image_url: remote image URL |
615 |
| - :type image_url: str |
616 |
| - :param http_method: HTTP method used to perform request, defaults to "HEAD" |
617 |
| - :type http_method: str, optional |
618 |
| - :param attempt: request tries counter, defaults to 0 |
619 |
| - :type attempt: int, optional |
620 |
| - :param ssl_context: SSL context, defaults to None |
621 |
| - :type ssl_context: ssl.SSLContext, optional |
622 |
| -
|
623 |
| - :return: image length as str or None |
624 |
| - :rtype: Optional[int] |
| 612 | + """Retrieve length for remote images (starting with 'http'). |
| 613 | +
|
| 614 | + Firstly, it tries to perform a HEAD request and get the length from the headers. \ |
| 615 | + If it fails, it tries again with a GET and disabling SSL verification. |
| 616 | +
|
| 617 | + Args: |
| 618 | + image_url (str): image URL |
| 619 | + http_method (str, optional): HTTP method to use for the request. |
| 620 | + Defaults to "HEAD". |
| 621 | + attempt (int, optional): request tries counter. Defaults to 0. |
| 622 | + ssl_verify (bool, optional): option to perform SSL verification or not. |
| 623 | + Defaults to True. |
| 624 | +
|
| 625 | + Returns: |
| 626 | + int | None: image length as int or None |
625 | 627 | """
|
626 |
| - # prepare request |
627 |
| - req = request.Request( |
628 |
| - image_url, |
629 |
| - method=http_method, |
630 |
| - headers=REMOTE_REQUEST_HEADERS, |
631 |
| - ) |
632 | 628 | # first, try HEAD request to avoid downloading the image
|
633 | 629 | try:
|
634 | 630 | attempt += 1
|
635 |
| - remote_img = request.urlopen(url=req, context=ssl_context) |
636 |
| - img_length = remote_img.getheader("content-length") |
637 |
| - except (HTTPError, URLError) as err: |
638 |
| - logging.warning( |
| 631 | + req_response = self.req_session.request( |
| 632 | + method=http_method, url=image_url, verify=ssl_verify |
| 633 | + ) |
| 634 | + req_response.raise_for_status() |
| 635 | + img_length = req_response.headers.get("content-length") |
| 636 | + except HTTPError as err: |
| 637 | + logger.debug( |
639 | 638 | f"Remote image could not been reached: {image_url}. "
|
640 | 639 | f"Trying again with GET and disabling SSL verification. Attempt: {attempt}. "
|
641 | 640 | f"Trace: {err}"
|
642 | 641 | )
|
643 | 642 | if attempt < 2:
|
644 | 643 | return self.get_remote_image_length(
|
645 |
| - image_url, |
646 |
| - http_method="GET", |
647 |
| - attempt=attempt, |
648 |
| - ssl_context=ssl._create_unverified_context(), |
| 644 | + image_url, http_method="GET", attempt=attempt, ssl_verify=False |
649 | 645 | )
|
650 | 646 | else:
|
651 |
| - logging.error( |
| 647 | + logger.info( |
652 | 648 | f"Remote image is not reachable: {image_url} after "
|
653 | 649 | f"{attempt} attempts. Trace: {err}"
|
654 | 650 | )
|
|
0 commit comments