1212with a temporary directory and install in there
1313"""
1414
15+ import importlib
1516import logging
1617import os
1718import site
@@ -48,6 +49,27 @@ def run_command(command: list[str]) -> None:
4849 raise ValueError (msg )
4950
5051
52+ def new_venv () -> tempfile .TemporaryDirectory :
53+ """1. Creates a new temporary directory with a venv inside.
54+ 2. Extends sys.path so that packages in that venv can be imported.
55+ """
56+ logger .debug ("creating a new venv" )
57+ td = tempfile .TemporaryDirectory (prefix = "cascade_runner_venv_" )
58+ # NOTE we create a venv instead of just plain directory, because some of the packages create files
59+ # outside of site-packages. Thus we then install with --prefix, not with --target
60+ run_command (Commands .venv_command (td .name ))
61+
62+ # NOTE not sure if getsitepackages was intended for this -- if issues, attempt replacing
63+ # with something like f"{td.name}/lib/python*/site-packages" + globbing
64+ extra_sp = site .getsitepackages (prefixes = [td .name ])
65+ # NOTE this makes the explicit packages go first, in case of a different version
66+ logger .debug (f"extending sys.path with { extra_sp } " )
67+ sys .path = extra_sp + sys .path
68+ logger .debug (f"new sys.path: { sys .path } " )
69+
70+ return td
71+
72+
5173class PackagesEnv (AbstractContextManager ):
5274 def __init__ (self ) -> None :
5375 self .td : tempfile .TemporaryDirectory | None = None
@@ -56,11 +78,7 @@ def extend(self, packages: list[str]) -> None:
5678 if not packages :
5779 return
5880 if self .td is None :
59- self .td = tempfile .TemporaryDirectory ()
60- logger .debug (f"creating a new venv at { self .td .name } " )
61- run_command (Commands .venv_command (self .td .name ))
62- # NOTE we create a venv instead of just plain directory, because some of the packages create files
63- # outside of site-packages. Thus we then install with --prefix, not with --target
81+ self .td = new_venv ()
6482
6583 logger .debug (
6684 f"installing { len (packages )} packages: { ',' .join (packages [:3 ])} { ',...' if len (packages ) > 3 else '' } "
@@ -71,14 +89,21 @@ def extend(self, packages: list[str]) -> None:
7189 if cache_dir := os .environ .get ("VENV_CACHE" , "" ):
7290 install_command += ["--cache-dir" , cache_dir ]
7391 install_command .extend (set (packages ))
92+ logger .debug (f"running install command: { ' ' .join (install_command )} " )
7493 run_command (install_command )
75- # NOTE not sure if getsitepackages was intended for this -- if issues, attempt replacing
76- # with something like f"{self.td.name}/lib/python*/site-packages" + globbing
77- extra_sp = site .getsitepackages (prefixes = [self .td .name ])
78- # NOTE this makes the explicit packages go first, in case of a different version
79- sys .path = extra_sp + sys .path
94+
95+ # NOTE we need this due to namespace packages:
96+ # 1. task 1 installs ns.pkg1 in its venv
97+ # 2. task 1 finishes, task 2 starts on the same worker
98+ # 3. task 2 starts, installs ns.pkg2. However, importlib is in a state that ns is aware only of pkg1 submod
99+ # Additionally, the caches are invalid anyway, because task 1's venv is already deleted
100+ importlib .invalidate_caches ()
101+ # TODO some namespace packages may require a reimport because they dynamically build `__all__` -- eg earthkit
80102
81103 def __exit__ (self , exc_type , exc_val , exc_tb ) -> Literal [False ]:
104+ sys .path = [
105+ p for p in sys .path if self .td is None or not p .startswith (self .td .name )
106+ ]
82107 if self .td is not None :
83108 self .td .cleanup ()
84109 return False
0 commit comments