Skip to content

Commit ae94e53

Browse files
Devdutt Shenoinitisht
andauthored
chore: update deps and improve readability of Cargo.toml (#1075)
* chore: update datafusion to v44.0.0 * chore: update `object_store` to v0.11.2 Fixes included are mentioned in https://github.com/alamb/arrow-rs/blob/de87e2e7c0c9f4e3f8fe120c803ee3c0cb38f1d4/object_store/CHANGELOG.md * style: organize deps better for maintainability * style: fmt dep * ci: use compatible version of rust * chore: up MSRV to 1.83.0 --------- Signed-off-by: Nitish Tiwari <[email protected]> Co-authored-by: Nitish Tiwari <[email protected]>
1 parent 8b73d42 commit ae94e53

File tree

10 files changed

+368
-227
lines changed

10 files changed

+368
-227
lines changed

Cargo.lock

Lines changed: 258 additions & 147 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 79 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,85 @@ name = "parseable"
33
version = "1.7.0"
44
authors = ["Parseable Team <[email protected]>"]
55
edition = "2021"
6-
rust-version = "1.77.1"
6+
rust-version = "1.83.0"
77
categories = ["logging", "observability", "log analytics"]
88
build = "build.rs"
99

1010
[dependencies]
11-
### apache arrow/datafusion dependencies
12-
arrow-schema = { version = "53.0.0", features = ["serde"] }
11+
# Arrow and DataFusion ecosystem
1312
arrow-array = { version = "53.0.0" }
14-
arrow-json = "53.0.0"
13+
arrow-flight = { version = "53.0.0", features = ["tls"] }
1514
arrow-ipc = { version = "53.0.0", features = ["zstd"] }
15+
arrow-json = "53.0.0"
16+
arrow-schema = { version = "53.0.0", features = ["serde"] }
1617
arrow-select = "53.0.0"
17-
datafusion = "42.0.0"
18-
object_store = { version = "0.11.1", features = ["cloud", "aws", "azure"] }
18+
datafusion = "44.0.0"
19+
object_store = { version = "0.11.2", features = ["cloud", "aws", "azure"] }
1920
parquet = "53.0.0"
20-
arrow-flight = { version = "53.0.0", features = ["tls"] }
21-
tonic = { version = "0.12.3", features = ["tls", "transport", "gzip", "zstd"] }
22-
tonic-web = "0.12.3"
23-
tower-http = { version = "0.6.1", features = ["cors"] }
2421

25-
### actix dependencies
26-
actix-web-httpauth = "0.8"
27-
actix-web = { version = "4.9.0", features = ["rustls-0_22"] }
22+
# Web server and HTTP-related
2823
actix-cors = "0.7.0"
24+
actix-web = { version = "4.9.0", features = ["rustls-0_22"] }
25+
actix-web-httpauth = "0.8"
2926
actix-web-prometheus = { version = "0.1" }
3027
actix-web-static-files = "4.0"
28+
http = "0.2.7"
29+
http-auth-basic = "0.3.3"
3130
mime = "0.3.17"
31+
tonic = { version = "0.12.3", features = ["tls", "transport", "gzip", "zstd"] }
32+
tonic-web = "0.12.3"
33+
tower-http = { version = "0.6.1", features = ["cors"] }
34+
url = "2.4.0"
3235

33-
### other dependencies
34-
anyhow = { version = "1.0", features = ["backtrace"] }
36+
# Authentication and Security
3537
argon2 = "0.5.0"
36-
async-trait = "0.1.82"
3738
base64 = "0.22.0"
38-
lazy_static = "1.4"
39-
bytes = "1.4"
40-
byteorder = "1.4.3"
41-
bzip2 = { version = "*", features = ["static"] }
4239
cookie = "0.18.1"
40+
hex = "0.4"
41+
openid = { version = "0.15.0", default-features = false, features = ["rustls"] }
42+
rustls = "0.22.4"
43+
rustls-pemfile = "2.1.2"
44+
sha2 = "0.10.8"
45+
46+
# Serialization and Data Formats
47+
byteorder = "1.4.3"
48+
prost = "0.13.3"
49+
serde = { version = "1.0", features = ["rc", "derive"] }
50+
serde_json = "1.0"
51+
serde_repr = "0.1.17"
52+
53+
# Async and Runtime
54+
async-trait = "0.1.82"
55+
futures = "0.3"
56+
futures-util = "0.3.28"
57+
tokio = { version = "1.28", default-features = false, features = [
58+
"sync",
59+
"macros",
60+
"fs",
61+
] }
62+
tokio-stream = { version = "0.1", features = ["fs"] }
63+
64+
# Logging and Metrics
65+
opentelemetry-proto = { git = "https://github.com/parseablehq/opentelemetry-rust", branch = "fix-metrics-u64-serialization" }
66+
prometheus = { version = "0.13", features = ["process"] }
67+
prometheus-parse = "0.2.5"
68+
tracing = "0.1.41"
69+
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
70+
71+
# Time and Date
4372
chrono = "0.4"
4473
chrono-humanize = "0.2"
74+
humantime = "2.1.0"
75+
humantime-serde = "1.1"
76+
77+
# File System and I/O
78+
bzip2 = { version = "*", features = ["static"] }
79+
fs_extra = "1.3"
80+
path-clean = "1.0.1"
81+
relative-path = { version = "1.7", features = ["serde"] }
82+
xz2 = { version = "*", features = ["static"] }
83+
84+
# CLI and System
4585
clap = { version = "4.1", default-features = false, features = [
4686
"std",
4787
"color",
@@ -51,73 +91,50 @@ clap = { version = "4.1", default-features = false, features = [
5191
"cargo",
5292
"error-context",
5393
] }
54-
clokwerk = "0.4"
5594
crossterm = "0.28.1"
56-
derive_more = "0.99.18"
57-
fs_extra = "1.3"
58-
futures = "0.3"
59-
futures-util = "0.3.28"
60-
hex = "0.4"
6195
hostname = "0.4.0"
62-
http = "0.2.7"
63-
humantime-serde = "1.1"
64-
itertools = "0.13.0"
96+
human-size = "0.4"
6597
num_cpus = "1.15"
98+
sysinfo = "0.31.4"
99+
thread-priority = "1.0.0"
100+
uptime_lib = "0.3.0"
101+
102+
# Kafka
103+
rdkafka = { version = "0.36.2", default-features = false, features = ["tokio"] }
104+
105+
# Utility Libraries
106+
anyhow = { version = "1.0", features = ["backtrace"] }
107+
bytes = "1.4"
108+
clokwerk = "0.4"
109+
derive_more = "0.99.18"
110+
hashlru = { version = "0.11.0", features = ["serde"] }
111+
itertools = "0.13.0"
112+
lazy_static = "1.4"
113+
nom = "7.1.3"
66114
once_cell = "1.17.1"
67-
opentelemetry-proto = {git = "https://github.com/parseablehq/opentelemetry-rust", branch="fix-metrics-u64-serialization"}
68-
prometheus = { version = "0.13", features = ["process"] }
69115
rand = "0.8.5"
70116
regex = "1.7.3"
71-
relative-path = { version = "1.7", features = ["serde"] }
72117
reqwest = { version = "0.11.27", default-features = false, features = [
73118
"rustls-tls",
74119
"json",
75120
"gzip",
76121
"brotli",
77122
] } # cannot update cause rustls is not latest `see rustls`
78-
rustls = "0.22.4" # cannot update to 0.23 actix has not caught up yet
79-
rustls-pemfile = "2.1.2"
80123
semver = "1.0"
81-
serde = { version = "1.0", features = ["rc", "derive"] }
82-
serde_json = "1.0"
83124
static-files = "0.2"
84-
sysinfo = "0.31.4"
85125
thiserror = "2.0.0"
86-
thread-priority = "1.0.0"
87-
tokio = { version = "1.28", default-features = false, features = [
88-
"sync",
89-
"macros",
90-
"fs",
91-
] }
92-
tokio-stream = { version = "0.1", features = ["fs"] }
93-
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
94126
ulid = { version = "1.0", features = ["serde"] }
95-
uptime_lib = "0.3.0"
96127
xxhash-rust = { version = "0.8", features = ["xxh3"] }
97-
xz2 = { version = "*", features = ["static"] }
98-
nom = "7.1.3"
99-
humantime = "2.1.0"
100-
human-size = "0.4"
101-
openid = { version = "0.15.0", default-features = false, features = ["rustls"] }
102-
url = "2.4.0"
103-
http-auth-basic = "0.3.3"
104-
serde_repr = "0.1.17"
105-
hashlru = { version = "0.11.0", features = ["serde"] }
106-
path-clean = "1.0.1"
107-
prost = "0.13.3"
108-
prometheus-parse = "0.2.5"
109-
sha2 = "0.10.8"
110-
tracing = "0.1.41"
111128

112129
[build-dependencies]
113130
cargo_toml = "0.20.1"
131+
prost-build = "0.13.3"
114132
sha1_smol = { version = "1.0", features = ["std"] }
115133
static-files = "0.2"
116134
ureq = "2.6"
135+
url = "2.4.0"
117136
vergen = { version = "8.1", features = ["build", "git", "cargo", "gitcl"] }
118137
zip = { version = "2.2.0", default-features = false, features = ["deflate"] }
119-
url = "2.4.0"
120-
prost-build = "0.13.3"
121138

122139
[dev-dependencies]
123140
maplit = "1.0"

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1515

1616
# build stage
17-
FROM rust:1.77.1-bookworm as builder
17+
FROM rust:1.83.0-bookworm as builder
1818

1919
LABEL org.opencontainers.image.title="Parseable"
2020
LABEL maintainer="Parseable Team <[email protected]>"

src/query/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ use datafusion::arrow::record_batch::RecordBatch;
2727
use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeVisitor};
2828
use datafusion::error::DataFusionError;
2929
use datafusion::execution::disk_manager::DiskManagerConfig;
30-
use datafusion::execution::runtime_env::RuntimeEnv;
3130
use datafusion::execution::SessionStateBuilder;
3231
use datafusion::logical_expr::{Explain, Filter, LogicalPlan, PlanType, ToStringifiedPlan};
3332
use datafusion::prelude::*;
@@ -77,7 +76,7 @@ impl Query {
7776
};
7877

7978
let runtime_config = runtime_config.with_memory_limit(pool_size, fraction);
80-
let runtime = Arc::new(RuntimeEnv::new(runtime_config).unwrap());
79+
let runtime = Arc::new(runtime_config.build().unwrap());
8180

8281
let mut config = SessionConfig::default()
8382
.with_parquet_pruning(true)

src/query/stream_schema_provider.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use chrono::{DateTime, NaiveDateTime, Timelike, Utc};
3030
use datafusion::catalog::Session;
3131
use datafusion::common::stats::Precision;
3232
use datafusion::logical_expr::utils::conjunction;
33+
use datafusion::physical_expr::LexOrdering;
3334
use datafusion::{
3435
catalog::SchemaProvider,
3536
common::{
@@ -73,6 +74,7 @@ use super::listing_table_builder::ListingTableBuilder;
7374
use crate::catalog::Snapshot as CatalogSnapshot;
7475

7576
// schema provider for stream based on global data
77+
#[derive(Debug)]
7678
pub struct GlobalSchemaProvider {
7779
pub storage: Arc<dyn ObjectStorage>,
7880
}
@@ -159,7 +161,7 @@ impl StandardTableProvider {
159161
statistics,
160162
projection: projection.cloned(),
161163
limit,
162-
output_ordering: vec![vec![sort_expr]],
164+
output_ordering: vec![LexOrdering::from_iter([sort_expr])],
163165
table_partition_cols: Vec::new(),
164166
},
165167
filters.as_ref(),

src/storage/azure_blob.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use async_trait::async_trait;
2929
use datafusion::datasource::object_store::{
3030
DefaultObjectStoreRegistry, ObjectStoreRegistry, ObjectStoreUrl,
3131
};
32-
use datafusion::execution::runtime_env::RuntimeConfig;
32+
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
3333
use object_store::azure::{MicrosoftAzure, MicrosoftAzureBuilder};
3434
use object_store::{BackoffConfig, ClientOptions, ObjectStore, PutPayload, RetryConfig};
3535
use relative_path::{RelativePath, RelativePathBuf};
@@ -150,7 +150,7 @@ impl AzureBlobConfig {
150150
}
151151

152152
impl ObjectStorageProvider for AzureBlobConfig {
153-
fn get_datafusion_runtime(&self) -> RuntimeConfig {
153+
fn get_datafusion_runtime(&self) -> RuntimeEnvBuilder {
154154
let azure = self.get_default_builder().build().unwrap();
155155
// limit objectstore to a concurrent request limit
156156
let azure = LimitStore::new(azure, super::MAX_OBJECT_STORE_REQUESTS);
@@ -161,7 +161,7 @@ impl ObjectStorageProvider for AzureBlobConfig {
161161
.unwrap();
162162
object_store_registry.register_store(url.as_ref(), Arc::new(azure));
163163

164-
RuntimeConfig::new().with_object_store_registry(Arc::new(object_store_registry))
164+
RuntimeEnvBuilder::new().with_object_store_registry(Arc::new(object_store_registry))
165165
}
166166

167167
fn construct_client(&self) -> Arc<dyn super::ObjectStorage> {
@@ -191,6 +191,7 @@ pub fn to_object_store_path(path: &RelativePath) -> StorePath {
191191

192192
// ObjStoreClient is generic client to enable interactions with different cloudprovider's
193193
// object store such as S3 and Azure Blob
194+
#[derive(Debug)]
194195
pub struct BlobStore {
195196
client: LimitStore<MicrosoftAzure>,
196197
account: String,

src/storage/localfs.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use std::{
2525

2626
use async_trait::async_trait;
2727
use bytes::Bytes;
28-
use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeConfig};
28+
use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder};
2929
use fs_extra::file::CopyOptions;
3030
use futures::{stream::FuturesUnordered, TryStreamExt};
3131
use relative_path::{RelativePath, RelativePathBuf};
@@ -64,8 +64,8 @@ pub struct FSConfig {
6464
}
6565

6666
impl ObjectStorageProvider for FSConfig {
67-
fn get_datafusion_runtime(&self) -> RuntimeConfig {
68-
RuntimeConfig::new()
67+
fn get_datafusion_runtime(&self) -> RuntimeEnvBuilder {
68+
RuntimeEnvBuilder::new()
6969
}
7070

7171
fn construct_client(&self) -> Arc<dyn ObjectStorage> {
@@ -81,6 +81,7 @@ impl ObjectStorageProvider for FSConfig {
8181
}
8282
}
8383

84+
#[derive(Debug)]
8485
pub struct LocalFS {
8586
// absolute path of the data directory
8687
root: PathBuf,

src/storage/object_storage.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,14 @@ use arrow_schema::Schema;
4646
use async_trait::async_trait;
4747
use bytes::Bytes;
4848
use chrono::Local;
49-
use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeConfig};
49+
use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder};
5050
use once_cell::sync::OnceCell;
5151
use relative_path::RelativePath;
5252
use relative_path::RelativePathBuf;
5353
use tracing::error;
5454

5555
use std::collections::BTreeMap;
56+
use std::fmt::Debug;
5657
use std::num::NonZeroU32;
5758
use std::{
5859
collections::HashMap,
@@ -63,7 +64,7 @@ use std::{
6364
};
6465

6566
pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug + Send + Sync {
66-
fn get_datafusion_runtime(&self) -> RuntimeConfig;
67+
fn get_datafusion_runtime(&self) -> RuntimeEnvBuilder;
6768
fn construct_client(&self) -> Arc<dyn ObjectStorage>;
6869
fn get_object_store(&self) -> Arc<dyn ObjectStorage> {
6970
static STORE: OnceCell<Arc<dyn ObjectStorage>> = OnceCell::new();
@@ -75,7 +76,7 @@ pub trait ObjectStorageProvider: StorageMetrics + std::fmt::Debug + Send + Sync
7576
}
7677

7778
#[async_trait]
78-
pub trait ObjectStorage: Send + Sync + 'static {
79+
pub trait ObjectStorage: Debug + Send + Sync + 'static {
7980
async fn get_object(&self, path: &RelativePath) -> Result<Bytes, ObjectStorageError>;
8081
// TODO: make the filter function optional as we may want to get all objects
8182
async fn get_objects(

src/storage/s3.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use datafusion::datasource::listing::ListingTableUrl;
2222
use datafusion::datasource::object_store::{
2323
DefaultObjectStoreRegistry, ObjectStoreRegistry, ObjectStoreUrl,
2424
};
25-
use datafusion::execution::runtime_env::RuntimeConfig;
25+
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
2626
use futures::stream::FuturesUnordered;
2727
use futures::{StreamExt, TryStreamExt};
2828
use object_store::aws::{AmazonS3, AmazonS3Builder, AmazonS3ConfigKey, Checksum};
@@ -285,7 +285,7 @@ impl S3Config {
285285
}
286286

287287
impl ObjectStorageProvider for S3Config {
288-
fn get_datafusion_runtime(&self) -> RuntimeConfig {
288+
fn get_datafusion_runtime(&self) -> RuntimeEnvBuilder {
289289
let s3 = self.get_default_builder().build().unwrap();
290290

291291
// limit objectstore to a concurrent request limit
@@ -296,7 +296,7 @@ impl ObjectStorageProvider for S3Config {
296296
let url = ObjectStoreUrl::parse(format!("s3://{}", &self.bucket_name)).unwrap();
297297
object_store_registry.register_store(url.as_ref(), Arc::new(s3));
298298

299-
RuntimeConfig::new().with_object_store_registry(Arc::new(object_store_registry))
299+
RuntimeEnvBuilder::new().with_object_store_registry(Arc::new(object_store_registry))
300300
}
301301

302302
fn construct_client(&self) -> Arc<dyn ObjectStorage> {
@@ -325,6 +325,7 @@ fn to_object_store_path(path: &RelativePath) -> StorePath {
325325
StorePath::from(path.as_str())
326326
}
327327

328+
#[derive(Debug)]
328329
pub struct S3 {
329330
client: LimitStore<AmazonS3>,
330331
bucket: String,

0 commit comments

Comments
 (0)