@@ -17,6 +17,7 @@ import {
1717 IS_SECOND_VALIDATION_PHASE ,
1818} from 'pyodide-internal:metadata' ;
1919import {
20+ invalidateCaches ,
2021 PythonRuntimeError ,
2122 PythonUserError ,
2223 simpleRunPython ,
@@ -302,55 +303,40 @@ function loadDynlibFromVendor(
302303}
303304
304305/**
305- * This loads all dynamic libraries visible in the site-packages directory. They
306- * are loaded before the runtime is initialized outside of the heap, using the
307- * same mechanism for DT_NEEDED libs (i.e., the libs that are loaded before the
308- * program starts because you passed them as linker args).
309- *
310- * Currently, we pessimistically preload all libs. It would be nice to only load
311- * the ones that are used. I am pretty sure we can manage this by reserving a
312- * separate shared lib metadata arena at startup and allocating shared libs
313- * there.
306+ * Preloading for the legacy 0.26 version. This loads all dynamic libraries
307+ * visible in the site-packages directory. They are loaded before the runtime is
308+ * initialized outside of the heap, using the same mechanism for DT_NEEDED libs
309+ * (i.e., the libs that are loaded before the program starts because you passed
310+ * them as linker args).
314311 */
315- export function preloadDynamicLibs ( Module : Module ) : void {
316- Module . noInitialRun = isRestoringSnapshot ( ) ;
317- Module . getMemoryPatched = getMemoryPatched ;
318- Module . growMemory ( LOADED_SNAPSHOT_META ?. snapshotSize ?? 0 ) ;
312+ function preloadDynamicLibs026 ( Module : Module ) : void {
319313 const sitePackages = Module . FS . sessionSitePackages + '/' ;
320314 const sitePackagesRoot = VIRTUALIZED_DIR . getSitePackagesRoot ( ) ;
321- if ( Module . API . version === '0.26.0a2' ) {
322- const loadedBaselineSnapshot =
323- LOADED_SNAPSHOT_META ?. settings ?. baselineSnapshot ;
324- let SO_FILES_TO_LOAD : string [ ] [ ] ;
325- if ( IS_CREATING_BASELINE_SNAPSHOT || loadedBaselineSnapshot ) {
326- SO_FILES_TO_LOAD = [ [ '_lzma.so' ] , [ '_ssl.so' ] ] ;
327- } else {
328- SO_FILES_TO_LOAD = sortSoFiles ( VIRTUALIZED_DIR . getSoFilesToLoad ( ) ) ;
329- }
330- for ( const soFile of SO_FILES_TO_LOAD ) {
331- loadDynlibFromTarFs ( Module , sitePackages , sitePackagesRoot , soFile ) ;
332- }
333- return ;
315+ const loadedBaselineSnapshot =
316+ LOADED_SNAPSHOT_META ?. settings ?. baselineSnapshot ;
317+ let SO_FILES_TO_LOAD : string [ ] [ ] ;
318+ if ( IS_CREATING_BASELINE_SNAPSHOT || loadedBaselineSnapshot ) {
319+ SO_FILES_TO_LOAD = [ [ '_lzma.so' ] , [ '_ssl.so' ] ] ;
320+ } else {
321+ SO_FILES_TO_LOAD = sortSoFiles ( VIRTUALIZED_DIR . getSoFilesToLoad ( ) ) ;
334322 }
335- if ( ! LOADED_SNAPSHOT_META ?. loadOrder ) {
336- return ;
323+ for ( const soFile of SO_FILES_TO_LOAD ) {
324+ loadDynlibFromTarFs ( Module , sitePackages , sitePackagesRoot , soFile ) ;
337325 }
338- // In Pyodide 0.28 we switched from using top level EM_JS to initialize the CountArgs function
339- // pointer to using an initializer to work around a regression in Emscripten 4.0.3 and 4.0.4. We
340- // could drop this patch because we are now on Emscripten 4.0.9.
341- // https://github.com/pyodide/pyodide/blob/main/cpython/patches/0008-Fix-Emscripten-call-trampoline-compatibility-with-Em.patch
342- //
343- // Unfortunately, this initializer allocates a function table slot and is called before dynamic
344- // loading when taking the snapshot but after when restoring the snapshot. Thus, when restoring a
345- // snapshot, before loading dynamic libraries we reserve a function pointer for the
346- // CountArgsPointer and after loading dynamic libraries, we put it in the free list so it will be
347- // used at the right moment.
348- const PyEMCountArgsPtr = Module . getEmptyTableSlot ( ) ;
326+ }
349327
328+ /**
329+ * If we're restoring from a snapshot, we need to preload dynamic libraries so that any function
330+ * pointers that point into the dylib symbols work correctly.
331+ * Load the dynamic libraries in loadOrder. Mostly logic dealing with paths.
332+ */
333+ function preloadDynamicLibsMain ( Module : Module , loadOrder : string [ ] ) : void {
334+ const sitePackages = Module . FS . sessionSitePackages + '/' ;
335+ const sitePackagesRoot = VIRTUALIZED_DIR . getSitePackagesRoot ( ) ;
350336 const dynlibRoot = VIRTUALIZED_DIR . getDynlibRoot ( ) ;
351337 const dynlibPath = '/usr/lib/' ;
352338 const userBundleNames = MetadataReader . getNames ( ) ;
353- for ( let path of LOADED_SNAPSHOT_META . loadOrder ) {
339+ for ( let path of loadOrder ) {
354340 let root = sitePackagesRoot ;
355341 let base = '' ;
356342 if ( path . startsWith ( sitePackages ) ) {
@@ -375,6 +361,32 @@ export function preloadDynamicLibs(Module: Module): void {
375361 loadDynlibFromTarFs ( Module , base , root , pathSplit ) ;
376362 }
377363 }
364+ }
365+
366+ function preloadDynamicLibs ( Module : Module ) : void {
367+ if ( Module . API . version === '0.26.0a2' ) {
368+ // In 0.26.0a2 we need to preload dynamic libraries even if we aren't restoring a snapshot.
369+ preloadDynamicLibs026 ( Module ) ;
370+ return ;
371+ }
372+ //
373+ const loadOrder = LOADED_SNAPSHOT_META ?. loadOrder ;
374+ if ( ! loadOrder ) {
375+ // In newer versions we only need to do the preloading if there is a snapshot to restore.
376+ return ;
377+ }
378+ // In Pyodide 0.28 we switched from using top level EM_JS to initialize the CountArgs function
379+ // pointer to using an initializer to work around a regression in Emscripten 4.0.3 and 4.0.4. We
380+ // could drop this patch because we are now on Emscripten 4.0.9.
381+ // https://github.com/pyodide/pyodide/blob/main/cpython/patches/0008-Fix-Emscripten-call-trampoline-compatibility-with-Em.patch
382+ //
383+ // Unfortunately, this initializer allocates a function table slot and is called before dynamic
384+ // loading when taking the snapshot but after when restoring the snapshot. Thus, when restoring a
385+ // snapshot, before loading dynamic libraries we reserve a function pointer for the
386+ // CountArgsPointer and after loading dynamic libraries, we put it in the free list so it will be
387+ // used at the right moment.
388+ const PyEMCountArgsPtr = Module . getEmptyTableSlot ( ) ;
389+ preloadDynamicLibsMain ( Module , loadOrder ) ;
378390 Module . freeTableIndexes . push ( PyEMCountArgsPtr ) ;
379391}
380392
@@ -691,6 +703,17 @@ function checkSnapshotType(snapshotType: string): void {
691703}
692704
693705export function maybeRestoreSnapshot ( Module : Module ) : void {
706+ Module . noInitialRun = isRestoringSnapshot ( ) ;
707+ Module . getMemoryPatched = getMemoryPatched ;
708+ // Make sure memory is large enough
709+ Module . growMemory ( LOADED_SNAPSHOT_META ?. snapshotSize ?? 0 ) ;
710+ enterJaegerSpan ( 'preload_dynamic_libs' , ( ) => {
711+ preloadDynamicLibs ( Module ) ;
712+ } ) ;
713+ // TODO: Remove the jaeger span here
714+ enterJaegerSpan ( 'remove_run_dependency' , ( ) => {
715+ Module . removeRunDependency ( 'dynlibs' ) ;
716+ } ) ;
694717 if ( ! LOADED_SNAPSHOT_META ) {
695718 return ;
696719 }
@@ -703,10 +726,7 @@ export function maybeRestoreSnapshot(Module: Module): void {
703726 snapshotReader . disposeMemorySnapshot ( ) ;
704727 // Invalidate caches if we have a snapshot because the contents of site-packages
705728 // may have changed.
706- simpleRunPython (
707- Module ,
708- 'from importlib import invalidate_caches as f; f(); del f'
709- ) ;
729+ invalidateCaches ( Module ) ;
710730}
711731
712732function collectSnapshot (
0 commit comments