Skip to content

Commit 2c97595

Browse files
committed
Fixes to external package loading: use external lock and load stdlib.
1 parent 1c424b5 commit 2c97595

File tree

8 files changed

+89
-19
lines changed

8 files changed

+89
-19
lines changed

src/pyodide/internal/loadPackage.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
} from 'pyodide-internal:metadata';
1919
import {
2020
SITE_PACKAGES,
21+
STDLIB_PACKAGES,
2122
getSitePackagesPath,
2223
} from 'pyodide-internal:setupPackages';
2324
import { parseTarInfo } from 'pyodide-internal:tar';
@@ -113,7 +114,12 @@ async function loadPackagesImpl(
113114
let loadPromises: Promise<[string, Reader]>[] = [];
114115
let loading = [];
115116
for (const req of requirements) {
116-
if (SITE_PACKAGES.loadedRequirements.has(req)) continue;
117+
if (req == 'test') {
118+
continue; // Skip the test package, it is only useful for internal Python regression testing.
119+
}
120+
if (SITE_PACKAGES.loadedRequirements.has(req)) {
121+
continue;
122+
}
117123
loadPromises.push(loadBundle(req).then((r) => [req, r]));
118124
loading.push(req);
119125
}
@@ -135,9 +141,10 @@ async function loadPackagesImpl(
135141
}
136142

137143
export async function loadPackages(Module: Module, requirements: Set<string>) {
144+
const pkgsToLoad = requirements.union(new Set(STDLIB_PACKAGES));
138145
if (LOAD_WHEELS_FROM_R2) {
139-
await loadPackagesImpl(Module, requirements, loadBundleFromR2);
146+
await loadPackagesImpl(Module, pkgsToLoad, loadBundleFromR2);
140147
} else if (LOAD_WHEELS_FROM_ARTIFACT_BUNDLER) {
141-
await loadPackagesImpl(Module, requirements, loadBundleFromArtifactBundler);
148+
await loadPackagesImpl(Module, pkgsToLoad, loadBundleFromArtifactBundler);
142149
}
143150
}

src/pyodide/internal/metadata.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import { default as MetadataReader } from 'pyodide-internal:runtime-generated/metadata';
22
import { default as PYODIDE_BUCKET } from 'pyodide-internal:generated/pyodide-bucket.json';
3+
// The pyodide-lock.json is read from the Python bundle (pyodide-capnp-bin).
4+
import { default as PYODIDE_LOCK } from 'pyodide-internal:generated/pyodide-lock.json';
35
import { default as ArtifactBundler } from 'pyodide-internal:artifacts';
46

57
export const IS_WORKERD = MetadataReader.isWorkerd();
@@ -12,9 +14,7 @@ export const LOAD_WHEELS_FROM_R2: boolean = IS_WORKERD;
1214
export const LOAD_WHEELS_FROM_ARTIFACT_BUNDLER =
1315
MetadataReader.shouldUsePackagesInArtifactBundler();
1416
export const PACKAGES_VERSION = MetadataReader.getPackagesVersion();
15-
export const LOCKFILE: PackageLock = JSON.parse(
16-
MetadataReader.getPackagesLock()
17-
);
17+
export const LOCKFILE: PackageLock = PYODIDE_LOCK;
1818
export const REQUIREMENTS = MetadataReader.getRequirements();
1919
export const MAIN_MODULE_NAME = MetadataReader.getMainModule();
2020
export const MEMORY_SNAPSHOT_READER = MetadataReader.hasMemorySnapshot()

src/pyodide/internal/setupPackages.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,21 @@ function canonicalizePackageName(name: string): string {
2323
}
2424

2525
// The "name" field in the lockfile is not canonicalized
26-
const STDLIB_PACKAGES: string[] = Object.values(LOCKFILE.packages)
26+
export const STDLIB_PACKAGES: string[] = Object.values(LOCKFILE.packages)
2727
.filter(({ install_dir }) => install_dir === 'stdlib')
2828
.map(({ name }) => canonicalizePackageName(name));
2929

30+
// Each item in the list is an element of the file path, for example
31+
// `folder/file.txt` -> `["folder", "file.txt"]
32+
export type FilePath = string[];
33+
3034
/**
3135
* SitePackagesDir keeps track of the virtualized view of the site-packages
3236
* directory generated for each worker.
3337
*/
3438
class SitePackagesDir {
3539
public rootInfo: TarFSInfo;
36-
public soFiles: string[][];
40+
public soFiles: FilePath[];
3741
public loadedRequirements: Set<string>;
3842
constructor() {
3943
this.rootInfo = {
@@ -133,6 +137,7 @@ class SitePackagesDir {
133137
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
134138
if (EmbeddedPackagesTarReader.read === undefined) {
135139
// Package retrieval is enabled, so the embedded tar reader isn't initialised.
140+
// All packages, including STDLIB_PACKAGES, are loaded in `loadPackages`.
136141
return new SitePackagesDir();
137142
}
138143

src/pyodide/internal/snapshot.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { default as ArtifactBundler } from 'pyodide-internal:artifacts';
22
import { default as UnsafeEval } from 'internal:unsafe-eval';
33
import { default as DiskCache } from 'pyodide-internal:disk_cache';
44
import {
5+
FilePath,
56
SITE_PACKAGES,
67
getSitePackagesPath,
78
} from 'pyodide-internal:setupPackages';
@@ -102,6 +103,46 @@ function loadDynlib(
102103
}
103104
}
104105

106+
/**
107+
* This function is used to ensure the order in which we load SO_FILES stays the same.
108+
*
109+
* The sort always puts _lzma.so and _ssl.so
110+
* first, because these SO_FILES are loaded in the baseline snapshot, and if we want to generate
111+
* a package snapshot while a baseline snapshot is loaded we need them to be first. The rest of the
112+
* files are sorted alphabetically.
113+
*
114+
* The `filePaths` list is of the form [["folder", "file.so"], ["file.so"]], so each element in it
115+
* is effectively a file path.
116+
*/
117+
function sortSoFiles(filePaths: FilePath[]): FilePath[] {
118+
const result = [];
119+
let hasLzma = false;
120+
let hasSsl = false;
121+
const lzmaFile = '_lzma.so';
122+
const sslFile = '_ssl.so';
123+
for (const path of filePaths) {
124+
if (path.length == 1 && path[0] == lzmaFile) {
125+
hasLzma = true;
126+
} else if (path.length == 1 && path[0] == sslFile) {
127+
hasSsl = true;
128+
} else {
129+
result.push(path);
130+
}
131+
}
132+
133+
// JS might handle sorting lists of lists fine, but I'd rather be explicit here and make it compare
134+
// strings.
135+
result.sort((a, b) => a.join('/').localeCompare(b.join('/')));
136+
if (hasSsl) {
137+
result.unshift([sslFile]);
138+
}
139+
if (hasLzma) {
140+
result.unshift([lzmaFile]);
141+
}
142+
143+
return result;
144+
}
145+
105146
// used for checkLoadedSoFiles a snapshot sanity check
106147
const PRELOADED_SO_FILES: string[] = [];
107148

@@ -121,6 +162,11 @@ export function preloadDynamicLibs(Module: Module): void {
121162
if (IS_CREATING_BASELINE_SNAPSHOT || LOADED_BASELINE_SNAPSHOT) {
122163
SO_FILES_TO_LOAD = [['_lzma.so'], ['_ssl.so']];
123164
}
165+
// The order in which we load the SO_FILES matters. For example, if a snapshot was generated with
166+
// SO_FILES loaded in a certain way, then if we load that snapshot and load the SO_FILES
167+
// differently here then Python will crash.
168+
SO_FILES_TO_LOAD = sortSoFiles(SO_FILES_TO_LOAD);
169+
124170
try {
125171
const sitePackages = getSitePackagesPath(Module);
126172
for (const soFile of SO_FILES_TO_LOAD) {

src/pyodide/types/pyodide-lock.d.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,8 @@ interface PackageLock {
1616
[id: string]: PackageDeclaration;
1717
};
1818
}
19+
20+
declare module 'pyodide-internal:generated/pyodide-lock.json' {
21+
const lock: PackageLock;
22+
export default lock;
23+
}

src/pyodide/types/runtime-generated/metadata.d.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ declare namespace MetadataReader {
1717
const disposeMemorySnapshot: () => void;
1818
const shouldUsePackagesInArtifactBundler: () => boolean;
1919
const getPackagesVersion: () => string;
20-
const getPackagesLock: () => string;
2120
const read: (index: number, position: number, buffer: Uint8Array) => number;
2221
}
2322

src/workerd/api/pyodide/pyodide.c++

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "pyodide.h"
55

66
#include <workerd/api/pyodide/setup-emscripten.h>
7+
#include <workerd/io/compatibility-date.h>
78
#include <workerd/util/string-buffer.h>
89
#include <workerd/util/strings.h>
910

@@ -22,6 +23,21 @@ const kj::Maybe<jsg::Bundle::Reader> PyodideBundleManager::getPyodideBundle(
2223
[](const MessageBundlePair& t) { return t.bundle; });
2324
}
2425

26+
kj::Maybe<kj::String> PyodideBundleManager::getPyodideLock(
27+
PythonSnapshotRelease::Reader pythonSnapshotRelease) const {
28+
auto bundleName = getPythonBundleName(pythonSnapshotRelease);
29+
// We expect the Pyodide Bundle for the specified bundle name to already be downloaded here.
30+
auto bundle = KJ_ASSERT_NONNULL(getPyodideBundle(bundleName));
31+
for (auto module: bundle.getModules()) {
32+
if (module.which() == workerd::jsg::Module::JSON &&
33+
module.getName() == "pyodide-internal:generated/pyodide-lock.json") {
34+
return kj::str(module.getJson());
35+
}
36+
}
37+
38+
return kj::none;
39+
}
40+
2541
void PyodideBundleManager::setPyodideBundleData(
2642
kj::String version, kj::Array<unsigned char> data) const {
2743
auto wordArray = kj::arrayPtr(
@@ -440,8 +456,7 @@ jsg::Ref<PyodideMetadataReader> makePyodideMetadataReader(
440456
names.finish(),
441457
contents.finish(),
442458
requirements.finish(),
443-
kj::str("20240829.4"), // TODO: hardcoded version & lock
444-
kj::str(PYODIDE_LOCK.toString()),
459+
kj::str("20240829.4"), // TODO: hardcoded version
445460
true /* isWorkerd */,
446461
false /* isTracing */,
447462
snapshotToDisk,

src/workerd/api/pyodide/pyodide.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class PyodideBundleManager {
2727
public:
2828
void setPyodideBundleData(kj::String version, kj::Array<unsigned char> data) const;
2929
const kj::Maybe<jsg::Bundle::Reader> getPyodideBundle(kj::StringPtr version) const;
30+
kj::Maybe<kj::String> getPyodideLock(PythonSnapshotRelease::Reader pythonSnapshotRelease) const;
3031

3132
private:
3233
struct MessageBundlePair {
@@ -80,7 +81,6 @@ class PyodideMetadataReader: public jsg::Object {
8081
kj::Array<kj::Array<kj::byte>> contents;
8182
kj::Array<kj::String> requirements;
8283
kj::String packagesVersion;
83-
kj::String packagesLock;
8484
bool isWorkerdFlag;
8585
bool isTracingFlag;
8686
bool snapshotToDisk;
@@ -94,7 +94,6 @@ class PyodideMetadataReader: public jsg::Object {
9494
kj::Array<kj::Array<kj::byte>> contents,
9595
kj::Array<kj::String> requirements,
9696
kj::String packagesVersion,
97-
kj::String packagesLock,
9897
bool isWorkerd,
9998
bool isTracing,
10099
bool snapshotToDisk,
@@ -106,7 +105,6 @@ class PyodideMetadataReader: public jsg::Object {
106105
contents(kj::mv(contents)),
107106
requirements(kj::mv(requirements)),
108107
packagesVersion(kj::mv(packagesVersion)),
109-
packagesLock(kj::mv(packagesLock)),
110108
isWorkerdFlag(isWorkerd),
111109
isTracingFlag(isTracing),
112110
snapshotToDisk(snapshotToDisk),
@@ -169,10 +167,6 @@ class PyodideMetadataReader: public jsg::Object {
169167
return kj::str(packagesVersion);
170168
}
171169

172-
kj::String getPackagesLock() {
173-
return kj::str(packagesLock);
174-
}
175-
176170
JSG_RESOURCE_TYPE(PyodideMetadataReader) {
177171
JSG_METHOD(isWorkerd);
178172
JSG_METHOD(isTracing);
@@ -189,7 +183,6 @@ class PyodideMetadataReader: public jsg::Object {
189183
JSG_METHOD(shouldSnapshotToDisk);
190184
JSG_METHOD(shouldUsePackagesInArtifactBundler);
191185
JSG_METHOD(getPackagesVersion);
192-
JSG_METHOD(getPackagesLock);
193186
JSG_METHOD(isCreatingBaselineSnapshot);
194187
}
195188

0 commit comments

Comments
 (0)