Skip to content

Commit 757ae6d

Browse files
committed
controllers/krate/publish: Validate JSON metadata before reading tarball
Previously, the `BytesRequest` allocated memory for the full tarball (and the JSON metadata blob) before even validating that the `name` and `vers` fields of the JSON metadata correspond to a crate and version that the user has publish access too. This commit changes the code to first read the JSON metadata from the request body stream, validate it, and then read the tarball bytes afterwards.
1 parent 154e004 commit 757ae6d

File tree

4 files changed

+87
-53
lines changed

4 files changed

+87
-53
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ tempfile = "=3.14.0"
114114
thiserror = "=2.0.3"
115115
tokio = { version = "=1.41.1", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros", "process"]}
116116
tokio-postgres = "=0.7.12"
117+
tokio-util = "=0.7.12"
117118
toml = "=0.8.19"
118119
tower = "=0.5.1"
119120
tower-http = { version = "=0.6.2", features = ["add-extension", "fs", "catch-panic", "timeout", "compression-full"] }

src/controllers/krate/publish.rs

Lines changed: 65 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::auth::AuthCheck;
55
use crate::worker::jobs::{
66
self, CheckTyposquat, SendPublishNotificationsJob, UpdateDefaultVersion,
77
};
8-
use axum::body::Bytes;
8+
use axum::body::{Body, Bytes};
99
use axum::Json;
1010
use cargo_manifest::{Dependency, DepsSet, TargetDepsSet};
1111
use chrono::{DateTime, SecondsFormat, Utc};
@@ -17,10 +17,12 @@ use diesel_async::scoped_futures::ScopedFutureExt;
1717
use diesel_async::{AsyncConnection, AsyncPgConnection, RunQueryDsl};
1818
use futures_util::TryStreamExt;
1919
use hex::ToHex;
20+
use http::request::Parts;
2021
use http::StatusCode;
21-
use hyper::body::Buf;
2222
use sha2::{Digest, Sha256};
2323
use std::collections::HashMap;
24+
use tokio::io::AsyncReadExt;
25+
use tokio_util::io::StreamReader;
2426
use url::Url;
2527

2628
use crate::models::{
@@ -35,7 +37,7 @@ use crate::rate_limiter::LimitedAction;
3537
use crate::schema::*;
3638
use crate::sql::canon_crate_name;
3739
use crate::util::errors::{bad_request, custom, internal, AppResult, BoxedAppError};
38-
use crate::util::{BytesRequest, Maximums};
40+
use crate::util::Maximums;
3941
use crate::views::{
4042
EncodableCrate, EncodableCrateDependency, GoodCrate, PublishMetadata, PublishWarnings,
4143
};
@@ -54,13 +56,50 @@ const MAX_DESCRIPTION_LENGTH: usize = 1000;
5456
/// Currently blocks the HTTP thread, perhaps some function calls can spawn new
5557
/// threads and return completion or error through other methods a `cargo publish
5658
/// --status` command, via crates.io's front end, or email.
57-
pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCrate>> {
58-
let (req, bytes) = req.0.into_parts();
59-
let (json_bytes, tarball_bytes) = split_body(bytes)?;
59+
pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<GoodCrate>> {
60+
let stream = body.into_data_stream();
61+
let stream = stream.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err));
62+
let mut reader = StreamReader::new(stream);
63+
64+
// The format of the req.body() of a publish request is as follows:
65+
//
66+
// metadata length
67+
// metadata in JSON about the crate being published
68+
// .crate tarball length
69+
// .crate tarball file
70+
71+
const MAX_JSON_LENGTH: u32 = 1024 * 1024; // 1 MB
72+
73+
let json_len = reader.read_u32_le().await.map_err(|e| {
74+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
75+
bad_request("invalid metadata length")
76+
} else {
77+
e.into()
78+
}
79+
})?;
80+
81+
if json_len > MAX_JSON_LENGTH {
82+
return Err(custom(
83+
StatusCode::PAYLOAD_TOO_LARGE,
84+
"JSON metadata blob too large",
85+
));
86+
}
87+
88+
let mut json_bytes = vec![0; json_len as usize];
89+
reader.read_exact(&mut json_bytes).await.map_err(|e| {
90+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
91+
let message = format!("invalid metadata length for remaining payload: {json_len}");
92+
bad_request(message)
93+
} else {
94+
e.into()
95+
}
96+
})?;
6097

6198
let metadata: PublishMetadata = serde_json::from_slice(&json_bytes)
6299
.map_err(|e| bad_request(format_args!("invalid upload request: {e}")))?;
63100

101+
drop(json_bytes);
102+
64103
Crate::validate_crate_name("crate", &metadata.name).map_err(bad_request)?;
65104

66105
let semver = match semver::Version::parse(&metadata.vers) {
@@ -136,7 +175,14 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
136175
.check_rate_limit(auth.user().id, rate_limit_action, &mut conn)
137176
.await?;
138177

139-
let content_length = tarball_bytes.len() as u64;
178+
let tarball_len = reader.read_u32_le().await.map_err(|e| {
179+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
180+
bad_request("invalid tarball length")
181+
} else {
182+
e.into()
183+
}
184+
})?;
185+
let content_length = tarball_len as u64;
140186

141187
let maximums = Maximums::new(
142188
existing_crate.as_ref().and_then(|c| c.max_upload_size),
@@ -151,6 +197,18 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
151197
));
152198
}
153199

200+
let mut tarball_bytes = vec![0; tarball_len as usize];
201+
reader.read_exact(&mut tarball_bytes).await.map_err(|e| {
202+
if e.kind() == std::io::ErrorKind::UnexpectedEof {
203+
let message = format!("invalid tarball length for remaining payload: {tarball_len}");
204+
bad_request(message)
205+
} else {
206+
e.into()
207+
}
208+
})?;
209+
210+
let tarball_bytes = Bytes::from(tarball_bytes);
211+
154212
let pkg_name = format!("{}-{}", &*metadata.name, &version_string);
155213
let tarball_info =
156214
process_tarball(&pkg_name, &*tarball_bytes, maximums.max_unpack_size).await?;
@@ -573,46 +631,6 @@ async fn count_versions_published_today(
573631
.await
574632
}
575633

576-
#[instrument(skip_all)]
577-
fn split_body(mut bytes: Bytes) -> AppResult<(Bytes, Bytes)> {
578-
// The format of the req.body() of a publish request is as follows:
579-
//
580-
// metadata length
581-
// metadata in JSON about the crate being published
582-
// .crate tarball length
583-
// .crate tarball file
584-
585-
if bytes.len() < 4 {
586-
// Avoid panic in `get_u32_le()` if there is not enough remaining data
587-
return Err(bad_request("invalid metadata length"));
588-
}
589-
590-
let json_len = bytes.get_u32_le() as usize;
591-
if json_len > bytes.len() {
592-
return Err(bad_request(format!(
593-
"invalid metadata length for remaining payload: {json_len}"
594-
)));
595-
}
596-
597-
let json_bytes = bytes.split_to(json_len);
598-
599-
if bytes.len() < 4 {
600-
// Avoid panic in `get_u32_le()` if there is not enough remaining data
601-
return Err(bad_request("invalid tarball length"));
602-
}
603-
604-
let tarball_len = bytes.get_u32_le() as usize;
605-
if tarball_len > bytes.len() {
606-
return Err(bad_request(format!(
607-
"invalid tarball length for remaining payload: {tarball_len}"
608-
)));
609-
}
610-
611-
let tarball_bytes = bytes.split_to(tarball_len);
612-
613-
Ok((json_bytes, tarball_bytes))
614-
}
615-
616634
async fn is_reserved_name(name: &str, conn: &mut AsyncPgConnection) -> QueryResult<bool> {
617635
select(exists(reserved_crate_names::table.filter(
618636
canon_crate_name(reserved_crate_names::name).eq(canon_crate_name(name)),

src/tests/krate/publish/tarball.rs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::tests::builders::PublishBuilder;
22
use crate::tests::util::{RequestHelper, TestApp};
3+
use bytes::{BufMut, BytesMut};
34
use crates_io_tarball::TarballBuilder;
45
use googletest::prelude::*;
56
use http::StatusCode;
@@ -80,9 +81,16 @@ async fn json_bytes_truncated() {
8081
async fn tarball_len_truncated() {
8182
let (app, _, _, token) = TestApp::full().with_token().await;
8283

83-
let response = token
84-
.publish_crate(&[2, 0, 0, 0, b'{', b'}', 0, 0] as &[u8])
85-
.await;
84+
let json = br#"{ "name": "foo", "vers": "1.0.0" }"#;
85+
86+
let mut bytes = BytesMut::new();
87+
bytes.put_u32_le(json.len() as u32);
88+
bytes.put_slice(json);
89+
bytes.put_u8(0);
90+
bytes.put_u8(0);
91+
92+
let response = token.publish_crate(bytes.freeze()).await;
93+
8694
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
8795
assert_snapshot!(response.text(), @r#"{"errors":[{"detail":"invalid tarball length"}]}"#);
8896
assert_that!(app.stored_files().await, empty());
@@ -92,9 +100,15 @@ async fn tarball_len_truncated() {
92100
async fn tarball_bytes_truncated() {
93101
let (app, _, _, token) = TestApp::full().with_token().await;
94102

95-
let response = token
96-
.publish_crate(&[2, 0, 0, 0, b'{', b'}', 100, 0, 0, 0, 0] as &[u8])
97-
.await;
103+
let json = br#"{ "name": "foo", "vers": "1.0.0" }"#;
104+
105+
let mut bytes = BytesMut::new();
106+
bytes.put_u32_le(json.len() as u32);
107+
bytes.put_slice(json);
108+
bytes.put_u32_le(100);
109+
bytes.put_u8(0);
110+
111+
let response = token.publish_crate(bytes.freeze()).await;
98112
assert_eq!(response.status(), StatusCode::BAD_REQUEST);
99113
assert_snapshot!(response.text(), @r#"{"errors":[{"detail":"invalid tarball length for remaining payload: 100"}]}"#);
100114
assert_that!(app.stored_files().await, empty());

0 commit comments

Comments
 (0)