Skip to content

Commit 89e1b4f

Browse files
committed
Support manifest for WASMFS fetchfs
This way, a fetchfs WASMFS backend can have a mapping from paths to URLs used to satisfy those paths. I think this makes more sense than creating a separate fetchfs backend for each individual file. In the absence of a manifest, the old behavior of using the file path as a relative path against the baseURL will still be used. NOTE, it's important that the fetchfs have a dedicated directory created/"mounted" or else the "create file with path" code path will never be used. In other words, paths at `/whatever.txt` can never work given the way WASMFS is implemented; the accessed files don't get paths unless the parent directory and child path have the same backend. So, creating the directory at `/dat` and having a manifest entry like `/whatever.txt` is the way to go for a file at `/dat/whatever.txt`. Manifest files are automatically used to mount/create file contents, so there's no need to use multipl calls to wasmfs_create_file or wasmfs_create_directory, just the one directory needs to be created (which is what FS.mount does in the JS API).
1 parent 6f0aaba commit 89e1b4f

File tree

7 files changed

+170
-50
lines changed

7 files changed

+170
-50
lines changed

src/library_fetchfs.js

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,25 @@
55
*/
66

77
addToLibrary({
8-
$FETCHFS__deps: ['$stringToUTF8OnStack', 'wasmfs_create_fetch_backend'],
8+
$FETCHFS__deps: ['$stringToUTF8OnStack', 'wasmfs_create_fetch_backend', 'wasmfs_fetch_create_manifest', 'wasmfs_fetch_add_to_manifest'],
99
$FETCHFS: {
1010
createBackend(opts) {
11-
return _wasmfs_create_fetch_backend(stringToUTF8OnStack(opts.base_url), opts.chunkSize | 0);
12-
}
11+
if(opts.base_url === undefined) {
12+
opts.base_url = "";
13+
}
14+
return withStackSave(() => {
15+
var manifest = 0;
16+
if (opts['manifest']) {
17+
manifest = _wasmfs_fetch_create_manifest();
18+
Object.entries(opts['manifest']).forEach(([path, url]) => {
19+
_wasmfs_fetch_add_to_manifest(manifest,
20+
stringToUTF8OnStack(path),
21+
stringToUTF8OnStack(url));
22+
});
23+
}
24+
return _wasmfs_create_fetch_backend(stringToUTF8OnStack(opts.base_url), opts.chunkSize | 0, manifest);
25+
});
26+
},
1327
},
1428
});
1529

src/library_wasmfs_fetch.js

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ addToLibrary({
3131
try {
3232
var u = new URL(fileUrl, self.location.origin);
3333
url = u.toString();
34-
} catch (e) {
34+
} catch(_e) {
35+
throw {status: 404};
3536
}
3637
}
3738
var chunkSize = __wasmfs_fetch_get_chunk_size(file);
@@ -41,15 +42,15 @@ addToLibrary({
4142
var lastChunk = ((offset+len) / chunkSize) | 0;
4243
if (!(file in wasmFS$JSMemoryRanges)) {
4344
var fileInfo = await fetch(url,{method:"HEAD", headers:{"Range": "bytes=0-"}});
44-
if(fileInfo.ok &&
45+
if (fileInfo.ok &&
4546
fileInfo.headers.has("Content-Length") &&
4647
fileInfo.headers.get("Accept-Ranges") == "bytes" &&
4748
(parseInt(fileInfo.headers.get("Content-Length")) > chunkSize*2)) {
4849
wasmFS$JSMemoryRanges[file] = {size:parseInt(fileInfo.headers.get("Content-Length")), chunks:[], chunkSize:chunkSize};
4950
} else {
5051
// may as well/forced to download the whole file
5152
var wholeFileReq = await fetch(url);
52-
if(!wholeFileReq.ok) {
53+
if (!wholeFileReq.ok) {
5354
throw wholeFileReq;
5455
}
5556
var wholeFileData = new Uint8Array(await wholeFileReq.arrayBuffer());
@@ -60,11 +61,11 @@ addToLibrary({
6061
}
6162
var allPresent = true;
6263
var i;
63-
if(lastChunk * chunkSize < offset+len) {
64+
if (lastChunk * chunkSize < offset+len) {
6465
lastChunk += 1;
6566
}
66-
for(i = firstChunk; i < lastChunk; i++) {
67-
if(!wasmFS$JSMemoryRanges[file].chunks[i]) {
67+
for (i = firstChunk; i < lastChunk; i++) {
68+
if (!wasmFS$JSMemoryRanges[file].chunks[i]) {
6869
allPresent = false;
6970
break;
7071
}
@@ -78,14 +79,13 @@ addToLibrary({
7879
var start = firstChunk*chunkSize;
7980
var end = lastChunk*chunkSize;
8081
var response = await fetch(url, {headers:{"Range": `bytes=${start}-${end-1}`}});
81-
if (response.ok) {
82-
var bytes = new Uint8Array(await response['arrayBuffer']());
83-
for (i = firstChunk; i < lastChunk; i++) {
84-
wasmFS$JSMemoryRanges[file].chunks[i] = bytes.slice(i*chunkSize-start,(i+1)*chunkSize-start);
85-
}
86-
} else {
82+
if (!response.ok) {
8783
throw response;
8884
}
85+
var bytes = new Uint8Array(await response['arrayBuffer']());
86+
for (i = firstChunk; i < lastChunk; i++) {
87+
wasmFS$JSMemoryRanges[file].chunks[i] = bytes.slice(i*chunkSize-start,(i+1)*chunkSize-start);
88+
}
8989
return Promise.resolve();
9090
}
9191

@@ -110,24 +110,21 @@ addToLibrary({
110110
read: async (file, buffer, length, offset) => {
111111
try {
112112
await getFileRange(file, offset || 0, length);
113-
} catch (response) {
114-
return response.status === 404 ? -{{{ cDefs.ENOENT }}} : -{{{ cDefs.EBADF }}};
113+
} catch (failedResponse) {
114+
return failedResponse.status === 404 ? -{{{ cDefs.ENOENT }}} : -{{{ cDefs.EBADF }}};
115115
}
116116
var fileInfo = wasmFS$JSMemoryRanges[file];
117117
var fileData = fileInfo.chunks;
118118
var chunkSize = fileInfo.chunkSize;
119119
var firstChunk = (offset / chunkSize) | 0;
120120
var lastChunk = ((offset+length) / chunkSize) | 0;
121-
if(offset + length > lastChunk * chunkSize) {
121+
if (offset + length > lastChunk * chunkSize) {
122122
lastChunk += 1;
123123
}
124124
var readLength = 0;
125125
for (var i = firstChunk; i < lastChunk; i++) {
126126
var chunk = fileData[i];
127127
var start = Math.max(i*chunkSize, offset);
128-
if(!chunk) {
129-
throw [fileData.length, firstChunk, lastChunk, i];
130-
}
131128
var chunkStart = i*chunkSize;
132129
var end = Math.min(chunkStart+chunkSize, offset+length);
133130
HEAPU8.set(chunk.subarray(start-chunkStart, end-chunkStart), buffer+(start-offset));
@@ -138,7 +135,7 @@ addToLibrary({
138135
getSize: async (file) => {
139136
try {
140137
await getFileRange(file, 0, 0);
141-
} catch (response) {
138+
} catch (failedResponse) {
142139
return 0;
143140
}
144141
return wasmFS$JSMemoryRanges[file].size;

system/include/emscripten/wasmfs.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,18 @@ backend_t wasmfs_create_memory_backend(void);
5757
//
5858
// TODO: Add an async version of this function that will work on the main
5959
// thread.
60-
backend_t wasmfs_create_fetch_backend(const char* base_url __attribute__((nonnull)), uint32_t);
60+
backend_t wasmfs_create_fetch_backend(const char* base_url __attribute__((nonnull)),
61+
uint32_t chunkSize,
62+
void *manifest);
63+
64+
// Create a FetchFS manifest record that can be populated and passed into
65+
// wasmfs_fetch_create_backend.
66+
void *wasmfs_fetch_create_manifest();
67+
68+
// Add a path to URL mapping to the given manifest.
69+
void wasmfs_fetch_add_to_manifest(void *manifest __attribute__((nonnull)),
70+
const char *path __attribute__((nonnull)),
71+
const char *url __attribute__((nonnull)));
6172

6273
backend_t wasmfs_create_node_backend(const char* root __attribute__((nonnull)));
6374

system/lib/wasmfs/backends/fetch_backend.cpp

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,35 @@ namespace wasmfs {
1414

1515
const uint32_t DEFAULT_CHUNK_SIZE = 16*1024*1024;
1616

17+
typedef std::map<std::string,std::string> FetchManifest;
18+
1719
class FetchBackend : public wasmfs::ProxiedAsyncJSBackend {
1820
std::string baseUrl;
1921
uint32_t chunkSize;
22+
FetchManifest *manifest;
2023
public:
24+
// Takes ownership of manifest
2125
FetchBackend(const std::string& baseUrl,
2226
uint32_t chunkSize,
27+
FetchManifest *manifest,
2328
std::function<void(backend_t)> setupOnThread)
24-
: ProxiedAsyncJSBackend(setupOnThread), baseUrl(baseUrl), chunkSize(chunkSize)
25-
// TODO manifest
29+
: ProxiedAsyncJSBackend(setupOnThread), baseUrl(baseUrl), chunkSize(chunkSize), manifest(manifest)
2630
{}
31+
~FetchBackend() {
32+
if(manifest != NULL) {
33+
delete manifest;
34+
}
35+
}
2736
std::shared_ptr<DataFile> createFile(mode_t mode) override;
2837
std::shared_ptr<Directory> createDirectory(mode_t mode) override;
2938
const std::string getFileURL(const std::string& filePath);
3039
uint32_t getChunkSize();
40+
const FetchManifest *getManifest() {
41+
return manifest;
42+
}
3143
};
3244

33-
45+
3446
class FetchFile : public ProxiedAsyncJSImplFile {
3547
std::string filePath;
3648
std::string fileUrl;
@@ -58,7 +70,45 @@ class FetchDirectory : public MemoryDirectory {
5870
mode_t mode,
5971
backend_t backend,
6072
emscripten::ProxyWorker& proxy)
61-
: MemoryDirectory(mode, backend), dirPath(path), proxy(proxy) {}
73+
: MemoryDirectory(mode, backend), dirPath(path), proxy(proxy) {
74+
auto manifest = dynamic_cast<FetchBackend*>(getBackend())->getManifest();
75+
if (manifest && path == "") {
76+
for (const auto& pair : *manifest) {
77+
auto path = pair.first;
78+
assert(path[0] == '/');
79+
char delimiter = '/';
80+
std::string path_so_far = "";
81+
std::string tmp = "";
82+
std::shared_ptr<FetchDirectory> dir = NULL;
83+
std::istringstream iss(path);
84+
while(std::getline(iss, tmp, delimiter)) {
85+
path_so_far += tmp;
86+
if (path_so_far == path) {
87+
if(!dir) {
88+
assert(this->insertDataFile(tmp, 0777));
89+
} else {
90+
assert(dir->insertDataFile(tmp, 0777));
91+
}
92+
} else if (path_so_far != "") {
93+
std::shared_ptr<FetchDirectory> next = NULL;
94+
if(!dir) {
95+
next = std::dynamic_pointer_cast<FetchDirectory>(this->getChild(tmp));
96+
} else {
97+
next = std::dynamic_pointer_cast<FetchDirectory>(dir->getChild(tmp));
98+
}
99+
if (next) {
100+
dir = next;
101+
assert(dir);
102+
} else {
103+
dir = std::dynamic_pointer_cast<FetchDirectory>(dir->insertDirectory(tmp, 0777));
104+
assert(dir);
105+
}
106+
}
107+
path_so_far += delimiter;
108+
}
109+
}
110+
}
111+
}
62112

63113
std::shared_ptr<DataFile> insertDataFile(const std::string& name,
64114
mode_t mode) override {
@@ -81,6 +131,10 @@ class FetchDirectory : public MemoryDirectory {
81131
std::string getChildPath(const std::string& name) const {
82132
return dirPath + '/' + name;
83133
}
134+
135+
std::shared_ptr<File> getChild(const std::string& name) override {
136+
return MemoryDirectory::getChild(name);
137+
}
84138
};
85139

86140
std::shared_ptr<DataFile> FetchBackend::createFile(mode_t mode) {
@@ -92,41 +146,60 @@ std::shared_ptr<Directory> FetchBackend::createDirectory(mode_t mode) {
92146
}
93147

94148
const std::string FetchBackend::getFileURL(const std::string& filePath) {
95-
// TODO use manifest
149+
if(manifest) {
150+
if (auto search = manifest->find(filePath); search != manifest->end()) {
151+
return baseUrl + "/" + search->second;
152+
}
153+
}
96154
if(filePath == "") {
97155
return baseUrl;
98156
}
99157
return baseUrl + "/" + filePath;
100158
}
159+
101160
uint32_t FetchBackend::getChunkSize() {
102161
return chunkSize;
103162
}
104163

105164
extern "C" {
106-
backend_t wasmfs_create_fetch_backend(const char* base_url, uint32_t chunkSize /* TODO manifest */) {
165+
backend_t wasmfs_create_fetch_backend(const char* base_url, uint32_t chunkSize, FetchManifest *manifest) {
107166
// ProxyWorker cannot safely be synchronously spawned from the main browser
108167
// thread. See comment in thread_utils.h for more details.
109168
assert(!emscripten_is_main_browser_thread() &&
110169
"Cannot safely create fetch backend on main browser thread");
111170
return wasmFS.addBackend(std::make_unique<FetchBackend>(
112171
base_url ? base_url : "",
113-
chunkSize != 0 ? chunkSize : DEFAULT_CHUNK_SIZE,
114-
/* TODO manifest */
172+
chunkSize ? chunkSize : DEFAULT_CHUNK_SIZE,
173+
manifest,
115174
[](backend_t backend) { _wasmfs_create_fetch_backend_js(backend); }));
116175
}
117176

118-
const char* EMSCRIPTEN_KEEPALIVE _wasmfs_fetch_get_file_path(void* ptr) {
177+
const char* _wasmfs_fetch_get_file_path(void* ptr) {
119178
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
120179
return file ? file->getPath().data() : nullptr;
121180
}
122-
const char* EMSCRIPTEN_KEEPALIVE _wasmfs_fetch_get_file_url(void* ptr) {
181+
182+
const char* _wasmfs_fetch_get_file_url(void* ptr) {
123183
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
124184
return file ? file->getURL().data() : nullptr;
125185
}
126-
uint32_t EMSCRIPTEN_KEEPALIVE _wasmfs_fetch_get_chunk_size(void* ptr) {
186+
187+
uint32_t _wasmfs_fetch_get_chunk_size(void* ptr) {
127188
auto* file = reinterpret_cast<wasmfs::FetchFile*>(ptr);
128189
return file ? file->getChunkSize() : DEFAULT_CHUNK_SIZE;
129190
}
191+
192+
void *EMSCRIPTEN_KEEPALIVE wasmfs_fetch_create_manifest() {
193+
return new FetchManifest();
194+
}
195+
196+
void EMSCRIPTEN_KEEPALIVE wasmfs_fetch_add_to_manifest(void *manifest_ptr, const char *path, const char *url) {
197+
auto* manifest = reinterpret_cast<FetchManifest *>(manifest_ptr);
198+
auto path_str = std::string(path);
199+
auto url_str = std::string(url);
200+
manifest->insert(std::pair(path_str, url_str));
201+
}
202+
130203
}
131204

132205
} // namespace wasmfs

system/lib/wasmfs/backends/fetch_backend.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@
66
#include "wasmfs.h"
77

88
extern "C" {
9-
// See library_wasmfs_fetch.js
10-
void _wasmfs_create_fetch_backend_js(wasmfs::backend_t);
9+
// See library_wasmfs_fetch.js
10+
void _wasmfs_create_fetch_backend_js(wasmfs::backend_t);
1111
}

test/common.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,7 +2013,7 @@ def send_head(self):
20132013
f = open(path, 'rb')
20142014
fs = os.fstat(f.fileno())
20152015
except IOError:
2016-
self.send_error(404, "File not found: " + path)
2016+
self.send_error(404, f'File not found {path}')
20172017
return None
20182018
if self.path.endswith('.js'):
20192019
self.send_response(200)
@@ -2022,16 +2022,16 @@ def send_head(self):
20222022
self.send_header('Content-length', fs[6])
20232023
self.end_headers()
20242024
return f
2025-
elif self.headers.get("Range"):
2025+
elif self.headers.get('Range'):
20262026
self.send_response(206)
20272027
ctype = self.guess_type(path)
20282028
self.send_header('Content-Type', ctype)
2029-
pieces = self.headers.get("Range").split("=")[1].split("-")
2029+
pieces = self.headers.get('Range').split('=')[1].split('-')
20302030
start = int(pieces[0]) if pieces[0] != '' else 0
20312031
end = int(pieces[1]) if pieces[1] != '' else fs[6] - 1
20322032
end = min(fs[6] - 1, end)
20332033
length = end - start + 1
2034-
self.send_header('Content-Range', "bytes " + str(start) + "-" + str(end) + "/" + str(fs[6]))
2034+
self.send_header('Content-Range', f'bytes {start}-{end}/{fs[6]}')
20352035
self.send_header('Content-Length', str(length))
20362036
self.end_headers()
20372037
return f
@@ -2135,19 +2135,19 @@ def do_GET(self):
21352135
# Use SimpleHTTPServer default file serving operation for GET.
21362136
if DEBUG:
21372137
print('[simple HTTP serving:', unquote_plus(self.path), ']')
2138-
if self.headers.get("Range"):
2138+
if self.headers.get('Range'):
21392139
self.send_response(206)
21402140
path = self.translate_path(self.path)
21412141
data = read_binary(path)
21422142
ctype = self.guess_type(path)
21432143
self.send_header('Content-type', ctype)
2144-
pieces = self.headers.get("range").split("=")[1].split("-")
2144+
pieces = self.headers.get('Range').split('=')[1].split('-')
21452145
start = int(pieces[0]) if pieces[0] != '' else 0
21462146
end = int(pieces[1]) if pieces[1] != '' else len(data) - 1
21472147
end = min(len(data) - 1, end)
21482148
length = end - start + 1
21492149
self.send_header('Content-Length', str(length))
2150-
self.send_header('Content-Range', "bytes " + str(start) + "-" + str(end) + "/" + str(len(data)))
2150+
self.send_header('Content-Range', f'bytes {start}-{end}/{len(data)}')
21512151
self.end_headers()
21522152
self.wfile.write(data[start:end + 1])
21532153
else:

0 commit comments

Comments
 (0)