Skip to content

Commit 0bf7e80

Browse files
committed
hub: refactor file download handler and make hub proxy error handling more robust
1 parent 9ad93b7 commit 0bf7e80

File tree

3 files changed

+84
-53
lines changed

3 files changed

+84
-53
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { readFile as readProjectFile } from "@cocalc/conat/files/read";
2+
import { once } from "events";
3+
import { path_split } from "@cocalc/util/misc";
4+
import mime from "mime-types";
5+
import getLogger from "../logger";
6+
7+
const DANGEROUS_CONTENT_TYPE = new Set(["image/svg+xml" /*, "text/html"*/]);
8+
9+
const logger = getLogger("hub:proxy:file-download");
10+
11+
// assumes request has already been authenticated!
12+
13+
export async function handleFileDownload(req, res, url, project_id) {
14+
logger.debug("handling the request via conat file streaming", url);
15+
const i = url.indexOf("files/");
16+
const compute_server_id = req.query.id ?? 0;
17+
let j = url.lastIndexOf("?");
18+
if (j == -1) {
19+
j = url.length;
20+
}
21+
const path = decodeURIComponent(url.slice(i + "files/".length, j));
22+
logger.debug("conat: get file", { project_id, path, compute_server_id, url });
23+
const fileName = path_split(path).tail;
24+
const contentType = mime.lookup(fileName);
25+
if (req.query.download != null || DANGEROUS_CONTENT_TYPE.has(contentType)) {
26+
const fileNameEncoded = encodeURIComponent(fileName)
27+
.replace(/['()]/g, escape)
28+
.replace(/\*/g, "%2A");
29+
res.setHeader(
30+
"Content-disposition",
31+
`attachment; filename*=UTF-8''${fileNameEncoded}`,
32+
);
33+
}
34+
res.setHeader("Content-type", contentType);
35+
36+
let headersSent = false;
37+
res.on("finish", () => {
38+
headersSent = true;
39+
});
40+
try {
41+
for await (const chunk of await readProjectFile({
42+
project_id,
43+
compute_server_id,
44+
path,
45+
// allow a long download time (1 hour), since files can be large and
46+
// networks can be slow.
47+
maxWait: 1000 * 60 * 60,
48+
})) {
49+
if (res.writableEnded || res.destroyed) {
50+
break;
51+
}
52+
if (!res.write(chunk)) {
53+
await once(res, "drain");
54+
}
55+
}
56+
res.end();
57+
} catch (err) {
58+
logger.debug(
59+
"ERROR streaming file",
60+
{ project_id, compute_server_id, path },
61+
err,
62+
);
63+
if (!headersSent) {
64+
res.statusCode = 500;
65+
res.end("Error reading file.");
66+
} else {
67+
// Data sent, forcibly kill the connection
68+
res.destroy(err);
69+
}
70+
}
71+
}

src/packages/hub/proxy/handle-request.ts

Lines changed: 10 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,8 @@ import { stripBasePath } from "./util";
1010
import { ProjectControlFunction } from "@cocalc/server/projects/control";
1111
import siteUrl from "@cocalc/database/settings/site-url";
1212
import { parseReq } from "./parse";
13-
import { readFile as readProjectFile } from "@cocalc/conat/files/read";
14-
import { path_split } from "@cocalc/util/misc";
15-
import { once } from "@cocalc/util/async-utils";
1613
import hasAccess from "./check-for-access-to-project";
17-
import mime from "mime-types";
18-
19-
const DANGEROUS_CONTENT_TYPE = new Set(["image/svg+xml" /*, "text/html"*/]);
14+
import { handleFileDownload } from "./file-download";
2015

2116
const logger = getLogger("proxy:handle-request");
2217

@@ -84,7 +79,6 @@ export default function init({ projectControl, isPersonal }: Options) {
8479
// TODO: parseReq is called again in getTarget so need to refactor...
8580
const { type, project_id } = parsed;
8681
if (type == "files") {
87-
dbg("handling the request via conat file streaming");
8882
if (
8983
!(await hasAccess({
9084
project_id,
@@ -96,43 +90,7 @@ export default function init({ projectControl, isPersonal }: Options) {
9690
) {
9791
throw Error(`user does not have read access to project`);
9892
}
99-
const i = url.indexOf("files/");
100-
const compute_server_id = req.query.id ?? 0;
101-
let j = url.lastIndexOf("?");
102-
if (j == -1) {
103-
j = url.length;
104-
}
105-
const path = decodeURIComponent(url.slice(i + "files/".length, j));
106-
dbg("conat: get file", { project_id, path, compute_server_id, url });
107-
const fileName = path_split(path).tail;
108-
const contentType = mime.lookup(fileName);
109-
if (
110-
req.query.download != null ||
111-
DANGEROUS_CONTENT_TYPE.has(contentType)
112-
) {
113-
const fileNameEncoded = encodeURIComponent(fileName)
114-
.replace(/['()]/g, escape)
115-
.replace(/\*/g, "%2A");
116-
res.setHeader(
117-
"Content-disposition",
118-
`attachment; filename*=UTF-8''${fileNameEncoded}`,
119-
);
120-
}
121-
res.setHeader("Content-type", contentType);
122-
for await (const chunk of await readProjectFile({
123-
project_id,
124-
compute_server_id,
125-
path,
126-
// allow a long download time (1 hour), since files can be large and
127-
// networks can be slow.
128-
maxWait: 1000 * 60 * 60,
129-
})) {
130-
if (!res.write(chunk)) {
131-
// backpressure -- wait for it to resolve
132-
await once(res, "drain");
133-
}
134-
}
135-
res.end();
93+
await handleFileDownload(req, res, url, project_id);
13694
return;
13795
}
13896

@@ -182,8 +140,14 @@ export default function init({ projectControl, isPersonal }: Options) {
182140
await handleProxyRequest(req, res);
183141
} catch (err) {
184142
const msg = `WARNING: error proxying request ${req.url} -- ${err}`;
185-
res.writeHead(426, { "Content-Type": "text/html" });
186-
res.end(msg);
143+
try {
144+
// this will fail if handleProxyRequest already wrote a header, so we
145+
// try/catch it.
146+
res.writeHead(500, { "Content-Type": "text/html" });
147+
} catch {}
148+
try {
149+
res.end(msg);
150+
} catch {}
187151
// Not something to log as an error -- just debug; it's normal for it to happen, e.g., when
188152
// a project isn't running.
189153
logger.debug(msg);

src/packages/hub/servers/express-app.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
The main hub express app.
33
*/
44

5-
import compression from "compression";
65
import cookieParser from "cookie-parser";
76
import express from "express";
87
import ms from "ms";
@@ -33,6 +32,9 @@ import basePath from "@cocalc/backend/base-path";
3332
import { initConatServer } from "@cocalc/server/conat/socketio";
3433
import { conatSocketioCount } from "@cocalc/backend/data";
3534

35+
// NOTE: we are not using compression because that interferes with streaming file download,
36+
// and could be generally confusing.
37+
3638
// Used for longterm caching of files. This should be in units of seconds.
3739
const MAX_AGE = Math.round(ms("10 days") / 1000);
3840
const SHORT_AGE = Math.round(ms("10 seconds") / 1000);
@@ -80,12 +82,6 @@ export default async function init(opts: Options): Promise<{
8082
app.use(vhostShare());
8183
}
8284

83-
// Enable compression, as suggested by
84-
// http://expressjs.com/en/advanced/best-practice-performance.html#use-gzip-compression
85-
// NOTE "Express runs everything in order" --
86-
// https://github.com/expressjs/compression/issues/35#issuecomment-77076170
87-
app.use(compression());
88-
8985
app.use(cookieParser());
9086

9187
// Install custom middleware to track response time metrics via prometheus

0 commit comments

Comments
 (0)