Skip to content

Commit 1872787

Browse files
authored
Python: Refactor preloadDynamicLibs to improve readability (#5340)
We only call preloadDynamicLibs in one place directly before maybeRestoreSnapshot() so I rolled it into maybeRestoreSnapshot. I sequestered the 0.26.0a2 logic into a separate function preloadDynamicLibs026() which should make it easier to avoid changing 0.26.0a2 when improving the newer versions.
1 parent 259775e commit 1872787

File tree

4 files changed

+74
-55
lines changed

4 files changed

+74
-55
lines changed

src/pyodide/internal/python.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import {
66
import {
77
maybeCollectSnapshot,
88
maybeRestoreSnapshot,
9-
preloadDynamicLibs,
109
finalizeBootstrap,
1110
isRestoringSnapshot,
1211
} from 'pyodide-internal:snapshot';
@@ -43,12 +42,6 @@ function prepareWasmLinearMemory(
4342
Module: Module,
4443
pyodide_entrypoint_helper: PyodideEntrypointHelper
4544
): void {
46-
enterJaegerSpan('preload_dynamic_libs', () => {
47-
preloadDynamicLibs(Module);
48-
});
49-
enterJaegerSpan('remove_run_dependency', () => {
50-
Module.removeRunDependency('dynlibs');
51-
});
5245
maybeRestoreSnapshot(Module);
5346
// entropyAfterRuntimeInit adjusts JS state ==> always needs to be called.
5447
entropyAfterRuntimeInit(Module);

src/pyodide/internal/setupPackages.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { parseTarInfo } from 'pyodide-internal:tar';
22
import { createMetadataFS } from 'pyodide-internal:metadatafs';
33
import { LOCKFILE } from 'pyodide-internal:metadata';
44
import {
5+
invalidateCaches,
56
PythonRuntimeError,
67
PythonUserError,
78
simpleRunPython,
@@ -151,6 +152,7 @@ class VirtualizedDir {
151152
return this.dynlibTarFs;
152153
}
153154

155+
/** Only used for Pyodide 0.26.0a2 */
154156
getSoFilesToLoad(): FilePath[] {
155157
return this.soFiles;
156158
}
@@ -223,10 +225,7 @@ export function mountWorkerFiles(Module: Module): void {
223225
Module.FS.mkdirTree('/session/metadata');
224226
const mdFS = createMetadataFS(Module);
225227
Module.FS.mount(mdFS, {}, '/session/metadata');
226-
simpleRunPython(
227-
Module,
228-
`from importlib import invalidate_caches; invalidate_caches(); del invalidate_caches`
229-
);
228+
invalidateCaches(Module);
230229
}
231230

232231
/**

src/pyodide/internal/snapshot.ts

Lines changed: 64 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
IS_SECOND_VALIDATION_PHASE,
1818
} from 'pyodide-internal:metadata';
1919
import {
20+
invalidateCaches,
2021
PythonRuntimeError,
2122
PythonUserError,
2223
simpleRunPython,
@@ -302,55 +303,40 @@ function loadDynlibFromVendor(
302303
}
303304

304305
/**
305-
* This loads all dynamic libraries visible in the site-packages directory. They
306-
* are loaded before the runtime is initialized outside of the heap, using the
307-
* same mechanism for DT_NEEDED libs (i.e., the libs that are loaded before the
308-
* program starts because you passed them as linker args).
309-
*
310-
* Currently, we pessimistically preload all libs. It would be nice to only load
311-
* the ones that are used. I am pretty sure we can manage this by reserving a
312-
* separate shared lib metadata arena at startup and allocating shared libs
313-
* there.
306+
* Preloading for the legacy 0.26 version. This loads all dynamic libraries
307+
* visible in the site-packages directory. They are loaded before the runtime is
308+
* initialized outside of the heap, using the same mechanism for DT_NEEDED libs
309+
* (i.e., the libs that are loaded before the program starts because you passed
310+
* them as linker args).
314311
*/
315-
export function preloadDynamicLibs(Module: Module): void {
316-
Module.noInitialRun = isRestoringSnapshot();
317-
Module.getMemoryPatched = getMemoryPatched;
318-
Module.growMemory(LOADED_SNAPSHOT_META?.snapshotSize ?? 0);
312+
function preloadDynamicLibs026(Module: Module): void {
319313
const sitePackages = Module.FS.sessionSitePackages + '/';
320314
const sitePackagesRoot = VIRTUALIZED_DIR.getSitePackagesRoot();
321-
if (Module.API.version === '0.26.0a2') {
322-
const loadedBaselineSnapshot =
323-
LOADED_SNAPSHOT_META?.settings?.baselineSnapshot;
324-
let SO_FILES_TO_LOAD: string[][];
325-
if (IS_CREATING_BASELINE_SNAPSHOT || loadedBaselineSnapshot) {
326-
SO_FILES_TO_LOAD = [['_lzma.so'], ['_ssl.so']];
327-
} else {
328-
SO_FILES_TO_LOAD = sortSoFiles(VIRTUALIZED_DIR.getSoFilesToLoad());
329-
}
330-
for (const soFile of SO_FILES_TO_LOAD) {
331-
loadDynlibFromTarFs(Module, sitePackages, sitePackagesRoot, soFile);
332-
}
333-
return;
315+
const loadedBaselineSnapshot =
316+
LOADED_SNAPSHOT_META?.settings?.baselineSnapshot;
317+
let SO_FILES_TO_LOAD: string[][];
318+
if (IS_CREATING_BASELINE_SNAPSHOT || loadedBaselineSnapshot) {
319+
SO_FILES_TO_LOAD = [['_lzma.so'], ['_ssl.so']];
320+
} else {
321+
SO_FILES_TO_LOAD = sortSoFiles(VIRTUALIZED_DIR.getSoFilesToLoad());
334322
}
335-
if (!LOADED_SNAPSHOT_META?.loadOrder) {
336-
return;
323+
for (const soFile of SO_FILES_TO_LOAD) {
324+
loadDynlibFromTarFs(Module, sitePackages, sitePackagesRoot, soFile);
337325
}
338-
// In Pyodide 0.28 we switched from using top level EM_JS to initialize the CountArgs function
339-
// pointer to using an initializer to work around a regression in Emscripten 4.0.3 and 4.0.4. We
340-
// could drop this patch because we are now on Emscripten 4.0.9.
341-
// https://github.com/pyodide/pyodide/blob/main/cpython/patches/0008-Fix-Emscripten-call-trampoline-compatibility-with-Em.patch
342-
//
343-
// Unfortunately, this initializer allocates a function table slot and is called before dynamic
344-
// loading when taking the snapshot but after when restoring the snapshot. Thus, when restoring a
345-
// snapshot, before loading dynamic libraries we reserve a function pointer for the
346-
// CountArgsPointer and after loading dynamic libraries, we put it in the free list so it will be
347-
// used at the right moment.
348-
const PyEMCountArgsPtr = Module.getEmptyTableSlot();
326+
}
349327

328+
/**
329+
* If we're restoring from a snapshot, we need to preload dynamic libraries so that any function
330+
* pointers that point into the dylib symbols work correctly.
331+
* Load the dynamic libraries in loadOrder. Mostly logic dealing with paths.
332+
*/
333+
function preloadDynamicLibsMain(Module: Module, loadOrder: string[]): void {
334+
const sitePackages = Module.FS.sessionSitePackages + '/';
335+
const sitePackagesRoot = VIRTUALIZED_DIR.getSitePackagesRoot();
350336
const dynlibRoot = VIRTUALIZED_DIR.getDynlibRoot();
351337
const dynlibPath = '/usr/lib/';
352338
const userBundleNames = MetadataReader.getNames();
353-
for (let path of LOADED_SNAPSHOT_META.loadOrder) {
339+
for (let path of loadOrder) {
354340
let root = sitePackagesRoot;
355341
let base = '';
356342
if (path.startsWith(sitePackages)) {
@@ -375,6 +361,32 @@ export function preloadDynamicLibs(Module: Module): void {
375361
loadDynlibFromTarFs(Module, base, root, pathSplit);
376362
}
377363
}
364+
}
365+
366+
function preloadDynamicLibs(Module: Module): void {
367+
if (Module.API.version === '0.26.0a2') {
368+
// In 0.26.0a2 we need to preload dynamic libraries even if we aren't restoring a snapshot.
369+
preloadDynamicLibs026(Module);
370+
return;
371+
}
372+
//
373+
const loadOrder = LOADED_SNAPSHOT_META?.loadOrder;
374+
if (!loadOrder) {
375+
// In newer versions we only need to do the preloading if there is a snapshot to restore.
376+
return;
377+
}
378+
// In Pyodide 0.28 we switched from using top level EM_JS to initialize the CountArgs function
379+
// pointer to using an initializer to work around a regression in Emscripten 4.0.3 and 4.0.4. We
380+
// could drop this patch because we are now on Emscripten 4.0.9.
381+
// https://github.com/pyodide/pyodide/blob/main/cpython/patches/0008-Fix-Emscripten-call-trampoline-compatibility-with-Em.patch
382+
//
383+
// Unfortunately, this initializer allocates a function table slot and is called before dynamic
384+
// loading when taking the snapshot but after when restoring the snapshot. Thus, when restoring a
385+
// snapshot, before loading dynamic libraries we reserve a function pointer for the
386+
// CountArgsPointer and after loading dynamic libraries, we put it in the free list so it will be
387+
// used at the right moment.
388+
const PyEMCountArgsPtr = Module.getEmptyTableSlot();
389+
preloadDynamicLibsMain(Module, loadOrder);
378390
Module.freeTableIndexes.push(PyEMCountArgsPtr);
379391
}
380392

@@ -691,6 +703,17 @@ function checkSnapshotType(snapshotType: string): void {
691703
}
692704

693705
export function maybeRestoreSnapshot(Module: Module): void {
706+
Module.noInitialRun = isRestoringSnapshot();
707+
Module.getMemoryPatched = getMemoryPatched;
708+
// Make sure memory is large enough
709+
Module.growMemory(LOADED_SNAPSHOT_META?.snapshotSize ?? 0);
710+
enterJaegerSpan('preload_dynamic_libs', () => {
711+
preloadDynamicLibs(Module);
712+
});
713+
// TODO: Remove the jaeger span here
714+
enterJaegerSpan('remove_run_dependency', () => {
715+
Module.removeRunDependency('dynlibs');
716+
});
694717
if (!LOADED_SNAPSHOT_META) {
695718
return;
696719
}
@@ -703,10 +726,7 @@ export function maybeRestoreSnapshot(Module: Module): void {
703726
snapshotReader.disposeMemorySnapshot();
704727
// Invalidate caches if we have a snapshot because the contents of site-packages
705728
// may have changed.
706-
simpleRunPython(
707-
Module,
708-
'from importlib import invalidate_caches as f; f(); del f'
709-
);
729+
invalidateCaches(Module);
710730
}
711731

712732
function collectSnapshot(

src/pyodide/internal/util.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,10 @@ export function simpleRunPython(
6969
}
7070
return cause;
7171
}
72+
73+
export function invalidateCaches(Module: Module): void {
74+
simpleRunPython(
75+
Module,
76+
`from importlib import invalidate_caches; invalidate_caches(); del invalidate_caches`
77+
);
78+
}

0 commit comments

Comments
 (0)