Skip to content

Commit 97ce5dd

Browse files
authored
Feature: add http caching mechanism (#309)
This PR aims to improve plugin's performance regarding HTTP requests which are performed to retrieve remote image length: - add a new option `cache_dir` - rely on https://pypi.org/project/CacheControl/ to manage local cache requests For now, it works only for GET requests. See upstream issue: psf/cachecontrol#337
2 parents 96c0404 + 96fec9f commit 97ce5dd

File tree

8 files changed

+99
-9
lines changed

8 files changed

+99
-9
lines changed

docs/configuration.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,28 @@ Default: `<!-- more -->`
195195

196196
----
197197

198+
### :material-recycle: `cache_dir`: folder where to store plugin's cached files { #cache_dir }
199+
200+
The plugin implements a caching mechanism, ensuring that a remote media is only get once during its life-cycle on remote HTTP server (using [Cache Control](https://pypi.org/project/CacheControl/) under the hood). It is normally not necessary to specify this setting, except for when you want to change the path within your root directory where HTTP body and metadata files are cached.
201+
202+
If you want to change it, use:
203+
204+
``` yaml
205+
plugins:
206+
- rss:
207+
cache_dir: my/custom/dir
208+
```
209+
210+
It's strongly recommended to add the path to your `.gitignore` file in the root of your project:
211+
212+
``` title=".gitignore"
213+
.cache
214+
```
215+
216+
Default: `.cache/plugins/rss`.
217+
218+
----
219+
198220
### :material-tag-multiple: `categories`: item categories { #categories }
199221

200222
`categories`: list of page metadata values to use as [RSS item categories](https://www.w3schools.com/xml/rss_tag_category_item.asp).

mkdocs_rss_plugin/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from mkdocs.config import config_options
99
from mkdocs.config.base import Config
1010

11+
# package
12+
from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER
13+
1114
# ############################################################################
1215
# ########## Classes ###############
1316
# ##################################
@@ -42,6 +45,7 @@ class RssPluginConfig(Config):
4245
categories = config_options.Optional(
4346
config_options.ListOfItems(config_options.Type(str))
4447
)
48+
cache_dir = config_options.Type(str, default=f"{DEFAULT_CACHE_FOLDER.resolve()}")
4549
comments_path = config_options.Optional(config_options.Type(str))
4650
date_from_meta = config_options.SubConfig(_DateFromMeta)
4751
enabled = config_options.Type(bool, default=True)

mkdocs_rss_plugin/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# ########## Globals #############
1515
# ################################
1616

17+
DEFAULT_CACHE_FOLDER = Path(".cache/plugins/rss")
1718
DEFAULT_TEMPLATE_FOLDER = Path(__file__).parent / "templates"
1819
DEFAULT_TEMPLATE_FILENAME = DEFAULT_TEMPLATE_FOLDER / "rss.xml.jinja2"
1920
MKDOCS_LOGGER_NAME = "[RSS-plugin]"

mkdocs_rss_plugin/plugin.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
9292
self.config.enabled = False
9393
return config
9494

95+
# cache dir
96+
self.cache_dir = Path(self.config.cache_dir)
97+
self.cache_dir.mkdir(parents=True, exist_ok=True)
98+
logger.debug(f"Caching HTTP requests to: {self.cache_dir.resolve()}")
99+
95100
# integrations - check if theme is Material and if social cards are enabled
96101
self.integration_material_social_cards = IntegrationMaterialSocialCards(
97102
mkdocs_config=config,
@@ -100,6 +105,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
100105

101106
# instantiate plugin tooling
102107
self.util = Util(
108+
cache_dir=self.cache_dir,
103109
use_git=self.config.use_git,
104110
integration_material_social_cards=self.integration_material_social_cards,
105111
)
@@ -169,10 +175,6 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:
169175
self.config.date_from_meta.default_time = datetime.strptime(
170176
self.config.date_from_meta.default_time, "%H:%M"
171177
)
172-
print(
173-
self.config.date_from_meta.default_time,
174-
type(self.config.date_from_meta.default_time),
175-
)
176178
except (TypeError, ValueError) as err:
177179
logger.warning(
178180
"Config error: `date_from_meta.default_time` value "

mkdocs_rss_plugin/util.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# 3rd party
2020
import markdown
2121
import urllib3
22+
from cachecontrol import CacheControl
23+
from cachecontrol.caches.file_cache import SeparateBodyFileCache
2224
from git import (
2325
GitCommandError,
2426
GitCommandNotFound,
@@ -34,7 +36,11 @@
3436
from requests.exceptions import ConnectionError, HTTPError
3537

3638
# package
37-
from mkdocs_rss_plugin.constants import MKDOCS_LOGGER_NAME, REMOTE_REQUEST_HEADERS
39+
from mkdocs_rss_plugin.constants import (
40+
DEFAULT_CACHE_FOLDER,
41+
MKDOCS_LOGGER_NAME,
42+
REMOTE_REQUEST_HEADERS,
43+
)
3844
from mkdocs_rss_plugin.git_manager.ci import CiHandler
3945
from mkdocs_rss_plugin.integrations.theme_material_social_plugin import (
4046
IntegrationMaterialSocialCards,
@@ -67,6 +73,7 @@ class Util:
6773
def __init__(
6874
self,
6975
path: str = ".",
76+
cache_dir: Path = DEFAULT_CACHE_FOLDER,
7077
use_git: bool = True,
7178
integration_material_social_cards: Optional[
7279
IntegrationMaterialSocialCards
@@ -122,8 +129,13 @@ def __init__(
122129
self.social_cards = integration_material_social_cards
123130

124131
# http/s session
125-
self.req_session = Session()
126-
self.req_session.headers.update(REMOTE_REQUEST_HEADERS)
132+
session = Session()
133+
session.headers.update(REMOTE_REQUEST_HEADERS)
134+
self.req_session = CacheControl(
135+
sess=session,
136+
cache=SeparateBodyFileCache(directory=cache_dir),
137+
cacheable_methods=("GET", "HEAD"),
138+
)
127139

128140
def build_url(
129141
self, base_url: str, path: str, args_dict: Optional[dict] = None

requirements/base.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Common requirements
22
# -----------------------
33

4-
4+
cachecontrol[filecache] >=0.14,<1
55
GitPython>=3.1,<3.2
66
mkdocs>=1.5,<2
77
requests>=2.31,<3

tests/dev/dev_cached_http.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import http.client
2+
import logging
3+
from pathlib import Path
4+
5+
import requests
6+
from cachecontrol import CacheControl
7+
from cachecontrol.caches.file_cache import FileCache
8+
9+
http.client.HTTPConnection.debuglevel = 1
10+
logging.basicConfig()
11+
logging.getLogger().setLevel(logging.DEBUG)
12+
req_log = logging.getLogger("requests.packages.urllib3")
13+
req_log.setLevel(logging.DEBUG)
14+
req_log.propagate = True
15+
16+
17+
sess = CacheControl(
18+
requests.Session(), cache=FileCache(".web_cache"), cacheable_methods=("HEAD", "GET")
19+
)
20+
21+
22+
# get requests
23+
resp = sess.get("https://geotribu.fr")
24+
resp_img = sess.get(
25+
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
26+
)
27+
28+
# try again, cache hit expected
29+
resp = sess.get("https://geotribu.fr")
30+
resp_img = sess.get(
31+
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
32+
)
33+
34+
# head requests
35+
resp_img = sess.head(
36+
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
37+
)
38+
39+
40+
# try again, cache hit expected
41+
resp_img = sess.head(
42+
"https://cdn.geotribu.fr/img/articles-blog-rdp/capture-ecran/kevish_Air-Traffic.png"
43+
)
44+
45+
print(list(Path(".web_cache").iterdir()))

tests/test_config.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
# 3rd party
2222
from mkdocs.config.base import Config
2323

24-
# plugin target
2524
from mkdocs_rss_plugin.config import RssPluginConfig
25+
26+
# plugin target
27+
from mkdocs_rss_plugin.constants import DEFAULT_CACHE_FOLDER
2628
from mkdocs_rss_plugin.plugin import GitRssPlugin
2729

2830
# test suite
@@ -62,6 +64,7 @@ def test_plugin_config_defaults(self):
6264
"abstract_chars_count": 160,
6365
"abstract_delimiter": "<!-- more -->",
6466
"categories": None,
67+
"cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}",
6568
"comments_path": None,
6669
"date_from_meta": {
6770
"as_creation": "git",
@@ -105,6 +108,7 @@ def test_plugin_config_image(self):
105108
expected = {
106109
"abstract_chars_count": 160,
107110
"abstract_delimiter": "<!-- more -->",
111+
"cache_dir": f"{DEFAULT_CACHE_FOLDER.resolve()}",
108112
"categories": None,
109113
"comments_path": None,
110114
"date_from_meta": {

0 commit comments

Comments
 (0)