Skip to content

Commit f1a0e75

Browse files
committed
Upload images referenced in document in one go
1 parent 5bb4e19 commit f1a0e75

File tree

5 files changed

+133
-8
lines changed

5 files changed

+133
-8
lines changed

CHANGES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@
1111
- Add HubSpot "BlogPost" wrapper and "upload" feature
1212
- Add HubSpot "File" wrapper, with refactoring
1313
- Add "delete" operations for both "BlogPost", and "File" entities
14-
14+
- Upload images referenced in document in one go
1515

1616
## 2023/09/xx 0.0.0

hubspot_tech_writing/core.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import functools
12
import logging
23
import typing as t
34
import warnings
@@ -12,6 +13,7 @@
1213
from hubspot_tech_writing.html import postprocess
1314
from hubspot_tech_writing.hubspot_api import HubSpotAdapter, HubSpotBlogPost, HubSpotFile
1415
from hubspot_tech_writing.util.common import ContentTypeResolver
16+
from hubspot_tech_writing.util.html import HTMLImageTranslator
1517
from hubspot_tech_writing.util.io import to_io
1618

1719
logger = logging.getLogger(__name__)
@@ -80,31 +82,46 @@ def upload(
8082
logger.info(f"Uploading file: {source}")
8183
hsa = HubSpotAdapter(access_token=access_token)
8284

83-
# Convert markup to HTML.
85+
# Upload text files as blog posts.
8486
if ctr.is_text():
87+
# Convert markup to HTML.
8588
if ctr.is_markup():
8689
html = convert(source)
8790
elif ctr.is_html():
8891
html = Path(source).read_text()
8992
else:
9093
raise ValueError(f"Unknown file type: {ctr.suffix}")
9194

95+
# Collect and converge images.
96+
if not folder_id and not folder_path:
97+
logger.warning("Images will not be uploaded, please supply folder id or folder name")
98+
else:
99+
uploader = functools.partial(
100+
upload, access_token=access_token, folder_id=folder_id, folder_path=folder_path
101+
)
102+
hit = HTMLImageTranslator(html=html, source_path=source_path, uploader=uploader)
103+
hit.discover().process()
104+
html = hit.html_out
105+
106+
# Upload blog post.
92107
name = name or source_path.stem
93108
article = HubSpotBlogPost(hubspot_adapter=hsa, name=name, content_group_id=content_group_id)
94109
post: BlogPost = article.post
95110
post.post_body = html
96-
article.save()
111+
return article.save()
97112

98113
# Only in emergency situations.
99114
# article.delete() # noqa: ERA001
100115

101-
elif ctr.is_file():
116+
# Upload other files as File objects.
117+
elif ctr.is_file(): # noqa: RET505
102118
name = name or source_path.name
103119
file = HubSpotFile(hubspot_adapter=hsa, source=source, name=name, folder_id=folder_id, folder_path=folder_path)
104-
file.save()
120+
return file.save()
121+
return None
105122

106-
# Only in emergency situations.
107-
# file.delete() # noqa: ERA001
123+
# Only in emergency situations.
124+
# file.delete() # noqa: ERA001
108125

109126

110127
def delete_blogpost(access_token: str, identifier: t.Optional[str] = None, name: t.Optional[str] = None):

hubspot_tech_writing/hubspot_api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,10 @@ def __init__(
284284
self.load()
285285

286286
def __str__(self):
287-
return f"{self.__class__.__name__} identifier={self.identifier}, name={self.name}"
287+
return (
288+
f"{self.__class__.__name__} identifier={self.identifier}, "
289+
f"name={self.name}, folder={self.folder_id or self.folder_path}"
290+
)
288291

289292
def load(self):
290293
"""

hubspot_tech_writing/util/html.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import dataclasses
2+
import logging
3+
import typing as t
4+
from copy import deepcopy
5+
from pathlib import Path
6+
7+
from bs4 import BeautifulSoup
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
@dataclasses.dataclass
13+
class HTMLImage:
14+
alt: str
15+
src: str
16+
17+
18+
class HTMLImageTranslator:
19+
"""
20+
Translate local image references into remote ones, by uploading them.
21+
After that, replace URLs in HTML document.
22+
"""
23+
24+
def __init__(self, html: str, source_path: t.Union[str, Path], uploader: t.Optional[t.Callable] = None):
25+
self.html_in: str = html
26+
self.html_out: t.Optional[str] = None
27+
self.source_path = source_path
28+
self.uploader = uploader
29+
self.images_in: t.List[HTMLImage] = []
30+
self.images_local: t.List[HTMLImage] = []
31+
self.images_remote: t.List[HTMLImage] = []
32+
33+
def __str__(self):
34+
return (
35+
f"HTMLImageTranslator:\nin: {self.images_in}\nlocal: {self.images_local}\nremote: {self.images_remote}"
36+
)
37+
38+
def discover(self):
39+
self.scan().resolve()
40+
return self
41+
42+
def process(self):
43+
self.upload()
44+
self.produce()
45+
return self
46+
47+
def scan(self) -> "HTMLImageTranslator":
48+
"""
49+
Scan input HTML for all <img ...> tags.
50+
"""
51+
soup = BeautifulSoup(self.html_in, features="html.parser")
52+
images = soup.find_all(name="img")
53+
self.images_in = []
54+
for image in images:
55+
self.images_in.append(HTMLImage(src=image.get("src"), alt=image.get("alt")))
56+
return self
57+
58+
def resolve(self) -> "HTMLImageTranslator":
59+
"""
60+
Process discovered image elements, computing effective paths.
61+
"""
62+
if self.source_path is None:
63+
return self
64+
parent_path = Path(self.source_path)
65+
if parent_path.is_file():
66+
parent_path = parent_path.parent
67+
self.images_local = []
68+
for image in self.images_in:
69+
image_new = deepcopy(image)
70+
if not image.src.startswith("http://") and not image.src.startswith("https://"):
71+
# Use absolute paths 1:1.
72+
if image.src.startswith("/"):
73+
pass
74+
75+
# Relative paths are relative to the original document.
76+
else:
77+
image_new.src = str(Path(parent_path) / image.src)
78+
self.images_local.append(image_new)
79+
return self
80+
81+
def upload(self) -> "HTMLImageTranslator":
82+
"""
83+
Upload images to HubSpot API, and store URLs.
84+
"""
85+
if self.uploader is None:
86+
logger.warning("No upload without uploader")
87+
return self
88+
for image_local in self.images_local:
89+
hs_file = self.uploader(source=image_local.src, name=Path(image_local.src).name)
90+
image_url = hs_file.url
91+
image_remote: HTMLImage = deepcopy(image_local)
92+
image_remote.src = image_url
93+
self.images_remote.append(image_remote)
94+
return self
95+
96+
def produce(self) -> "HTMLImageTranslator":
97+
"""
98+
Produce HTML output, with all image references replaced by their remote targets.
99+
"""
100+
html = self.html_in
101+
for image_in, image_remote in zip(self.images_in, self.images_remote):
102+
html = html.replace(image_in.src, image_remote.src)
103+
self.html_out = html
104+
return self

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ dynamic = [
6363
]
6464

6565
dependencies = [
66+
"beautifulsoup4<5",
6667
"click<9",
6768
"click-aliases<2",
6869
"colorlog<7",

0 commit comments

Comments
 (0)