Skip to content

Commit 5c642d0

Browse files
committed
Use pathlibfs for scheme-agnostic source access
1 parent 99153f1 commit 5c642d0

File tree

7 files changed

+93
-28
lines changed

7 files changed

+93
-28
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
## Unreleased
5+
- Use `pathlibfs` for scheme-agnostic source access
56

67
## 2023-10-07 0.1.0
78
- Add example data files in different formats

hubspot_tech_writing/core.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from hubspot_tech_writing.hubspot_api import HubSpotAdapter, HubSpotBlogPost, HubSpotFile
1515
from hubspot_tech_writing.util.common import ContentTypeResolver
1616
from hubspot_tech_writing.util.html import HTMLImageTranslator
17-
from hubspot_tech_writing.util.io import to_io
17+
from hubspot_tech_writing.util.io import path_from_url, to_io
1818

1919
logger = logging.getLogger(__name__)
2020

@@ -75,9 +75,14 @@ def upload(
7575
folder_id: t.Optional[str] = None,
7676
folder_path: t.Optional[str] = None,
7777
):
78-
source_path = Path(source)
78+
source_path: Path
79+
if isinstance(source, str):
80+
source_path = path_from_url(source)
81+
else:
82+
source_path = source
83+
logger.info(f"Source: {source_path}")
7984

80-
ctr = ContentTypeResolver(name=source_path)
85+
ctr = ContentTypeResolver(filepath=source_path)
8186

8287
logger.info(f"Uploading file: {source}")
8388
hsa = HubSpotAdapter(access_token=access_token)
@@ -101,6 +106,7 @@ def upload(
101106
)
102107
hit = HTMLImageTranslator(html=html, source_path=source_path, uploader=uploader)
103108
hit.discover().process()
109+
logger.debug(hit)
104110
html = hit.html_out
105111

106112
# Upload blog post.

hubspot_tech_writing/hubspot_api.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
import os
44
import typing as t
55
from copy import deepcopy
6-
from pathlib import Path
6+
from tempfile import NamedTemporaryFile
77

88
import hubspot
99
from click import confirm
1010
from hubspot import HubSpot
1111
from hubspot.cms.blogs.blog_posts import BlogPost
1212
from hubspot.files.files import File
13+
from pathlibfs import Path
1314

1415
logger = logging.getLogger(__name__)
1516

@@ -130,12 +131,14 @@ def get_file_by_name(self, file: "HubSpotFile") -> File:
130131
logger.info(f"Found file: id={result.id}, path={result.path}, url={result.url}")
131132
return result
132133

133-
def save_file(self, file_id: str, source: str):
134+
def save_file(self, file_id: str, source: Path):
134135
"""
135136
Save / overwrite existing file.
136137
"""
138+
tmpfile = NamedTemporaryFile()
139+
tmpfile.write(source.read_bytes())
137140
return self.hs.files.files.files_api.replace(
138-
file_id=file_id, file=source, options=json.dumps(self.FILE_OPTIONS)
141+
file_id=file_id, file=tmpfile.name, options=json.dumps(self.FILE_OPTIONS)
139142
)
140143

141144
def delete_file_by_id(self, identifier: str) -> t.Optional[File]:
@@ -254,7 +257,7 @@ class HubSpotFile:
254257
def __init__(
255258
self,
256259
hubspot_adapter: HubSpotAdapter,
257-
source: t.Union[str, Path],
260+
source: Path,
258261
identifier: t.Optional[str] = None,
259262
name: t.Optional[str] = None,
260263
folder_id: t.Optional[str] = None,
@@ -286,7 +289,7 @@ def __init__(
286289
def __str__(self):
287290
return (
288291
f"{self.__class__.__name__} identifier={self.identifier}, "
289-
f"name={self.name}, folder={self.folder_id or self.folder_path}"
292+
f"name={self.name}, folder={self.folder_id or self.folder_path}, source={self.source}"
290293
)
291294

292295
def load(self):
@@ -310,7 +313,7 @@ def save(self):
310313
if not self.source:
311314
raise ValueError(f"Unable to save file without source: {self}")
312315
logger.info(f"Saving file: {self}")
313-
return self.hsa.save_file(file_id=self.identifier, source=str(self.source))
316+
return self.hsa.save_file(file_id=self.identifier, source=self.source)
314317

315318
def delete(self):
316319
"""

hubspot_tech_writing/util/common.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import logging
22
import typing as t
3-
from pathlib import Path
43

54
import colorlog
65
from colorlog.escape_codes import escape_codes
6+
from pathlibfs import Path
7+
from yarl import URL
78

89

910
def setup_logging(level=logging.INFO, verbose: bool = False):
@@ -23,9 +24,12 @@ class ContentTypeResolver:
2324
HTML_SUFFIXES = [".html", ".html5", ".htm"]
2425
TEXT_SUFFIXES = MARKUP_SUFFIXES + HTML_SUFFIXES + [".txt"]
2526

26-
def __init__(self, name: t.Union[str, Path]):
27-
self.name = name
28-
self.suffix = Path(name).suffix
27+
def __init__(self, filepath: t.Union[str, Path]):
28+
self.url = URL(str(filepath))
29+
if self.url.is_absolute():
30+
self.url = self.url.with_scheme("")
31+
self.path = Path(str(self.url))
32+
self.suffix = self.path.suffix
2933

3034
def is_markup(self):
3135
return self.suffix in self.MARKUP_SUFFIXES
@@ -38,3 +42,8 @@ def is_text(self):
3842

3943
def is_file(self):
4044
return not self.is_text()
45+
46+
47+
def url_to_path(filepath: str):
48+
url = URL(str(filepath)).with_scheme("")
49+
return Path(str(url))

hubspot_tech_writing/util/html.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,18 @@
22
import logging
33
import typing as t
44
from copy import deepcopy
5-
from pathlib import Path
5+
from pprint import pformat
66

77
from bs4 import BeautifulSoup
8+
from pathlibfs import Path
89

910
logger = logging.getLogger(__name__)
1011

1112

1213
@dataclasses.dataclass
1314
class HTMLImage:
1415
alt: str
15-
src: str
16+
src: Path
1617

1718

1819
class HTMLImageTranslator:
@@ -21,19 +22,18 @@ class HTMLImageTranslator:
2122
After that, replace URLs in HTML document.
2223
"""
2324

24-
def __init__(self, html: str, source_path: t.Union[str, Path], uploader: t.Optional[t.Callable] = None):
25+
def __init__(self, html: str, source_path: Path, uploader: t.Optional[t.Callable] = None):
2526
self.html_in: str = html
2627
self.html_out: t.Optional[str] = None
27-
self.source_path = source_path
28+
self.source = source_path
2829
self.uploader = uploader
2930
self.images_in: t.List[HTMLImage] = []
3031
self.images_local: t.List[HTMLImage] = []
3132
self.images_remote: t.List[HTMLImage] = []
3233

3334
def __str__(self):
34-
return (
35-
f"HTMLImageTranslator:\nin: {self.images_in}\nlocal: {self.images_local}\nremote: {self.images_remote}"
36-
)
35+
info = {"source": self.source, "in": self.images_in, "local": self.images_local, "remote": self.images_remote}
36+
return f"HTMLImageTranslator:\n{pformat(info)}"
3737

3838
def discover(self):
3939
self.scan().resolve()
@@ -59,9 +59,10 @@ def resolve(self) -> "HTMLImageTranslator":
5959
"""
6060
Process discovered image elements, computing effective paths.
6161
"""
62-
if self.source_path is None:
62+
if self.source is None:
63+
logger.warning("No resolving without source path")
6364
return self
64-
parent_path = Path(self.source_path)
65+
parent_path = self.source
6566
if parent_path.is_file():
6667
parent_path = parent_path.parent
6768
self.images_local = []
@@ -74,7 +75,7 @@ def resolve(self) -> "HTMLImageTranslator":
7475

7576
# Relative paths are relative to the original document.
7677
else:
77-
image_new.src = str(Path(parent_path) / image.src)
78+
image_new.src = parent_path / image.src
7879
self.images_local.append(image_new)
7980
return self
8081

@@ -86,7 +87,7 @@ def upload(self) -> "HTMLImageTranslator":
8687
logger.warning("No upload without uploader")
8788
return self
8889
for image_local in self.images_local:
89-
hs_file = self.uploader(source=image_local.src, name=Path(image_local.src).name)
90+
hs_file = self.uploader(source=image_local.src, name=image_local.src.name)
9091
image_url = hs_file.url
9192
image_remote: HTMLImage = deepcopy(image_local)
9293
image_remote.src = image_url

hubspot_tech_writing/util/io.py

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,63 @@
33
import typing as t
44
from pathlib import Path
55

6-
import requests
6+
from pathlibfs import Path as PathPlus
7+
from yarl import URL
78

89

910
@contextlib.contextmanager
1011
def to_io(source: t.Union[str, Path, t.IO]) -> t.Generator[t.IO, None, None]:
11-
if isinstance(source, (str, Path)):
12+
fp: t.IO
13+
if isinstance(source, io.TextIOWrapper):
14+
fp = source
15+
elif isinstance(source, (str, Path, PathPlus)):
1216
source = str(source)
13-
fp: t.IO
17+
path = path_from_url(source)
18+
fp = path.open(mode="rt")
19+
"""
1420
if source.startswith("http://") or source.startswith("https://"):
1521
response = requests.get(source, timeout=10.0)
1622
fp = io.StringIO(response.text)
1723
else:
1824
fp = open(source, "r")
25+
"""
1926
else:
20-
fp = source
27+
raise TypeError(f"Unable to converge to IO handle. type={type(source)}, value={source}")
2128
yield fp
2229
fp.close()
30+
31+
32+
def path_from_url(url: str) -> PathPlus:
33+
"""
34+
Convert GitHub HTTP URL to pathlibfs / fsspec URL.
35+
36+
Input URLs
37+
----------
38+
github+https://foobar:[email protected]/acme/sweet-camino/path/to/document.md
39+
github+https://foobar:[email protected]/acme/sweet-camino/blob/main/path/to/document.md
40+
41+
Output Path
42+
-----------
43+
fs = Path("github://path/to/document.md", username="foobar", token="ghp_lalala", org="acme", repo="sweet-camino")
44+
"""
45+
uri = URL(url)
46+
47+
if uri.scheme.startswith("github+https"):
48+
path_fragments = uri.path.split("/")[1:]
49+
path_kwargs = {
50+
"username": uri.user,
51+
"token": uri.password,
52+
"org": path_fragments[0],
53+
"repo": path_fragments[1],
54+
}
55+
56+
real_path_fragments = path_fragments[2:]
57+
if path_fragments[2] == "blob":
58+
real_path_fragments = path_fragments[4:]
59+
60+
downstream_url = "github://" + "/".join(real_path_fragments)
61+
path = PathPlus(downstream_url, **path_kwargs)
62+
63+
else:
64+
path = PathPlus(url)
65+
return path

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ dependencies = [
7070
"hubspot-api-client<9",
7171
"markdown<4",
7272
"mkdocs-linkcheck<2",
73+
"pathlibfs<0.6",
7374
"requests<3",
75+
"yarl<2",
7476
]
7577

7678
[project.optional-dependencies]

0 commit comments

Comments
 (0)