Skip to content

Commit 494dac8

Browse files
authored
Get scripts: fallback to scripts directory (#98)
1 parent 9b38fc6 commit 494dac8

File tree

1 file changed

+32
-9
lines changed

1 file changed

+32
-9
lines changed

sh_scrapy/crawl.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,18 @@
33
# Add them below so that any import errors are caught and sent to sentry
44
# -----------------------------------------------------------------------
55
from __future__ import print_function
6+
import datetime
7+
import logging
68
import os
7-
import sys
89
import socket
9-
import logging
10-
import datetime
10+
import sys
11+
import sysconfig
1112
import warnings
1213
from contextlib import contextmanager
14+
from importlib.metadata import PathDistribution
15+
from pathlib import Path
16+
from typing import Tuple
17+
1318
# XXX: Do not use atexit to close Hubstorage client!
1419
# why: functions registed with atexit are called when run_script() finishes,
1520
# and at that point main() function doesn't completed leading to lost log
@@ -151,20 +156,38 @@ def get_distribution():
151156
d.run_script(scriptname, ns)
152157

153158

154-
def _run_script(dist, script_name, namespace):
159+
def _get_script_code_and_path(dist: PathDistribution, script_name: str) -> Tuple[str, str]:
160+
"""Get the code and absolute path of a script from the distribution metadata.
161+
If not found in the distribution, look for it in the scripts directory.
162+
"""
163+
script = "scripts/" + script_name
164+
source = dist.read_text(script)
165+
if source:
166+
script_filename = dist._path.joinpath(script)
167+
return source, str(script_filename)
168+
169+
# fallback: find script in the scripts directory
170+
scripts_dir = Path(sysconfig.get_path("scripts"))
171+
script_path = scripts_dir / script_name
172+
if script_path.exists():
173+
source = script_path.read_text()
174+
return source, str(script_path.absolute())
175+
176+
return None, None
177+
178+
179+
def _run_script(dist: PathDistribution, script_name: str, namespace: dict) -> None:
155180
# An importlib-based replacement for pkg_resources.NullProvider.run_script().
156181
# It's possible that this doesn't support all cases that pkg_resources does,
157182
# so it may need to be improved when those are discovered.
158183
# Using a private attribute (dist._path) seems to be necessary to get the
159184
# full file path, but it's only needed for diagnostic messages so it should
160185
# be easy to fix this by moving to relative paths if this API is removed.
161-
script = "scripts/" + script_name
162-
source = dist.read_text(script)
163-
if not source:
186+
source, script_filename = _get_script_code_and_path(dist, script_name)
187+
if source is None:
164188
raise ValueError(
165-
f"Script {script!r} not found in metadata at {dist._path!r}"
189+
f"Script {script_name!r} not found in metadata at {dist._path!r}"
166190
)
167-
script_filename = dist._path.joinpath(script)
168191
code = compile(source, str(script_filename), "exec")
169192
exec(code, namespace, namespace)
170193

0 commit comments

Comments
 (0)