Skip to content

Commit f6f11fb

Browse files
authored
Merge pull request #103 from constellation-rs/join
Add left & inner join and limited sort
2 parents e118942 + b6d9119 commit f6f11fb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+5014
-124
lines changed

Cargo.toml

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
[package]
44
name = "amadeus"
5-
version = "0.3.7"
5+
version = "0.4.0"
66
license = "Apache-2.0"
77
authors = ["Alec Mocatta <alec@mocatta.net>"]
88
categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"]
@@ -12,8 +12,8 @@ Harmonious distributed data processing & analysis in Rust.
1212
1313
parquet postgres aws s3 cloudfront elb json csv logs hadoop hdfs arrow common crawl
1414
"""
15-
repository = "https://github.com/alecmocatta/amadeus"
16-
homepage = "https://github.com/alecmocatta/amadeus"
15+
repository = "https://github.com/constellation-rs/amadeus"
16+
homepage = "https://github.com/constellation-rs/amadeus"
1717
documentation = "https://docs.rs/amadeus"
1818
readme = "README.md"
1919
edition = "2018"
@@ -36,18 +36,20 @@ bench = ["serde-csv", "once_cell", "arrow-parquet", "rayon"]
3636
features = ["constellation", "aws", "commoncrawl", "parquet", "postgres", "csv", "json"]
3737

3838
[dependencies]
39-
amadeus-core = { version = "=0.3.7", path = "amadeus-core" }
40-
amadeus-derive = { version = "=0.3.7", path = "amadeus-derive" }
41-
amadeus-types = { version = "=0.3.7", path = "amadeus-types" }
42-
amadeus-aws = { version = "=0.3.7", path = "amadeus-aws", optional = true }
43-
amadeus-commoncrawl = { version = "=0.3.7", path = "amadeus-commoncrawl", optional = true }
44-
amadeus-parquet = { version = "=0.3.7", path = "amadeus-parquet", optional = true }
45-
amadeus-postgres = { version = "=0.3.7", path = "amadeus-postgres", optional = true }
46-
amadeus-serde = { version = "=0.3.7", path = "amadeus-serde", optional = true }
39+
amadeus-core = { version = "=0.4.0", path = "amadeus-core" }
40+
amadeus-derive = { version = "=0.4.0", path = "amadeus-derive" }
41+
amadeus-types = { version = "=0.4.0", path = "amadeus-types" }
42+
amadeus-aws = { version = "=0.4.0", path = "amadeus-aws", optional = true }
43+
amadeus-commoncrawl = { version = "=0.4.0", path = "amadeus-commoncrawl", optional = true }
44+
amadeus-parquet = { version = "=0.4.0", path = "amadeus-parquet", optional = true }
45+
amadeus-postgres = { version = "=0.4.0", path = "amadeus-postgres", optional = true }
46+
amadeus-serde = { version = "=0.4.0", path = "amadeus-serde", optional = true }
47+
amadeus-streaming = { version = "=0.4.0", path = "amadeus-streaming" }
4748
async-channel = "1.1"
4849
bincode = { version = "1.3", optional = true }
4950
constellation-rs = { version = "0.2.0-alpha.2", default-features = false, optional = true }
5051
derive-new = "0.5"
52+
event-listener = "=2.3.1" # https://github.com/stjepang/event-listener/issues/9
5153
futures = "0.3"
5254
num_cpus = "1.13"
5355
pin-project = "0.4"
@@ -72,7 +74,6 @@ doc-comment = "0.3"
7274
either = { version = "1.5", features = ["serde"] }
7375
rand = "0.7"
7476
serde_json = "1.0"
75-
streaming_algorithms = "0.3"
7677
tokio = { version = "0.2", features = ["macros", "time"] }
7778

7879
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
@@ -81,6 +82,13 @@ wasm-bindgen-test = "0.3"
8182
[build-dependencies]
8283
rustversion = "1.0"
8384

85+
[profile.bench]
86+
codegen-units = 1
87+
debug = 2
88+
incremental = false
89+
lto = true
90+
# panic = "abort" # this is disallowed by cargo currently
91+
8492
[[example]]
8593
name = "cloudfront_logs"
8694
required-features = ["aws"]

amadeus-aws/Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
[package]
22
name = "amadeus-aws"
3-
version = "0.3.7"
3+
version = "0.4.0"
44
license = "Apache-2.0"
55
authors = ["Alec Mocatta <alec@mocatta.net>"]
66
categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"]
77
keywords = ["amadeus", "data", "aws", "s3", "logs"]
88
description = """
99
Harmonious distributed data analysis in Rust.
1010
"""
11-
repository = "https://github.com/alecmocatta/amadeus"
12-
homepage = "https://github.com/alecmocatta/amadeus"
11+
repository = "https://github.com/constellation-rs/amadeus"
12+
homepage = "https://github.com/constellation-rs/amadeus"
1313
documentation = "https://docs.rs/amadeus"
1414
readme = "README.md"
1515
edition = "2018"
@@ -19,8 +19,8 @@ azure-devops = { project = "alecmocatta/amadeus", pipeline = "tests", build = "2
1919
maintenance = { status = "actively-developed" }
2020

2121
[dependencies]
22-
amadeus-core = { version = "=0.3.7", path = "../amadeus-core" }
23-
amadeus-types = { version = "=0.3.7", path = "../amadeus-types" }
22+
amadeus-core = { version = "=0.4.0", path = "../amadeus-core" }
23+
amadeus-types = { version = "=0.4.0", path = "../amadeus-types" }
2424
async-compression = { version = "0.3.3", features = ["gzip", "futures-bufread"] }
2525
async-trait = "0.1"
2626
chrono = { version = "0.4", default-features = false }

amadeus-aws/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# amadeus-aws
22

3-
This subcrate of the [`amadeus`](https://github.com/alecmocatta/amadeus) project includes a filesystem backend for S3 and a source for AWS Cloudfront logs.
3+
This subcrate of the [`amadeus`](https://github.com/constellation-rs/amadeus) project includes a filesystem backend for S3 and a source for AWS Cloudfront logs.

amadeus-aws/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//!
77
//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. These types are re-exposed in [`amadeus::source`](https://docs.rs/amadeus/0.3/amadeus/source/index.html).
88
9-
#![doc(html_root_url = "https://docs.rs/amadeus-aws/0.3.7")]
9+
#![doc(html_root_url = "https://docs.rs/amadeus-aws/0.4.0")]
1010
#![cfg_attr(nightly, feature(type_alias_impl_trait))]
1111
#![warn(
1212
// missing_copy_implementations,

amadeus-commoncrawl/Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
[package]
22
name = "amadeus-commoncrawl"
3-
version = "0.3.7"
3+
version = "0.4.0"
44
license = "MIT OR Apache-2.0"
55
authors = ["Stephen Becker IV <github@deathbyescalator.com>", "Alec Mocatta <alec@mocatta.net>"]
66
categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"]
77
keywords = ["amadeus", "data", "commoncrawl", "web", "crawl"]
88
description = """
99
Harmonious distributed data analysis in Rust.
1010
"""
11-
repository = "https://github.com/alecmocatta/amadeus"
12-
homepage = "https://github.com/alecmocatta/amadeus"
11+
repository = "https://github.com/constellation-rs/amadeus"
12+
homepage = "https://github.com/constellation-rs/amadeus"
1313
documentation = "https://docs.rs/amadeus"
1414
readme = "README.md"
1515
edition = "2018"
@@ -19,8 +19,8 @@ azure-devops = { project = "alecmocatta/amadeus", pipeline = "tests", build = "2
1919
maintenance = { status = "actively-developed" }
2020

2121
[dependencies]
22-
amadeus-core = { version = "=0.3.7", path = "../amadeus-core" }
23-
amadeus-types = { version = "=0.3.7", path = "../amadeus-types" }
22+
amadeus-core = { version = "=0.4.0", path = "../amadeus-core" }
23+
amadeus-types = { version = "=0.4.0", path = "../amadeus-types" }
2424
async-compression = { version = "0.3.3", features = ["gzip", "futures-bufread"] }
2525
futures = "0.3"
2626
nom = "4.2.3"

amadeus-commoncrawl/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# amadeus-commoncrawl
22

3-
This subcrate of the [`amadeus`](https://github.com/alecmocatta/amadeus) project includes a source for the CommonCrawl datasets.
3+
This subcrate of the [`amadeus`](https://github.com/constellation-rs/amadeus) project includes a source for the CommonCrawl datasets.

amadeus-commoncrawl/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//!
77
//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. These types are re-exposed in [`amadeus::source`](https://docs.rs/amadeus/0.3/amadeus/source/index.html).
88
9-
#![doc(html_root_url = "https://docs.rs/amadeus-commoncrawl/0.3.7")]
9+
#![doc(html_root_url = "https://docs.rs/amadeus-commoncrawl/0.4.0")]
1010
#![cfg_attr(nightly, feature(type_alias_impl_trait))]
1111
#![warn(
1212
// missing_copy_implementations,

amadeus-core/Cargo.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
[package]
22
name = "amadeus-core"
3-
version = "0.3.7"
3+
version = "0.4.0"
44
license = "Apache-2.0"
55
authors = ["Alec Mocatta <alec@mocatta.net>"]
66
categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"]
77
keywords = ["amadeus", "distributed", "data-science", "data", "logs"]
88
description = """
99
Harmonious distributed data analysis in Rust.
1010
"""
11-
repository = "https://github.com/alecmocatta/amadeus"
12-
homepage = "https://github.com/alecmocatta/amadeus"
11+
repository = "https://github.com/constellation-rs/amadeus"
12+
homepage = "https://github.com/constellation-rs/amadeus"
1313
documentation = "https://docs.rs/amadeus"
1414
readme = "README.md"
1515
edition = "2018"
@@ -19,20 +19,21 @@ azure-devops = { project = "alecmocatta/amadeus", pipeline = "tests", build = "2
1919
maintenance = { status = "actively-developed" }
2020

2121
[dependencies]
22+
amadeus-streaming = { version = "=0.4.0", path = "../amadeus-streaming" }
2223
async-trait = "0.1"
2324
derive-new = "0.5"
2425
educe = "0.4"
2526
either = { version = "1.5", features = ["serde"] }
2627
futures = "0.3"
2728
indexmap = { version = "1.5", features = ["serde-1"] }
2829
itertools = "0.9"
30+
multimap = "0.8"
2931
owned_chars = "0.3"
3032
pin-project = "0.4"
3133
rand = "0.7"
3234
replace_with = "0.1"
3335
serde = { version = "1.0", features = ["derive"] }
3436
serde_closure = "0.3"
35-
streaming_algorithms = "0.3"
3637
sum = { version = "0.1", features = ["futures", "serde"] }
3738
tokio = { version = "0.2", features = ["blocking", "rt-core"] }
3839
walkdir = "2.2"

amadeus-core/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# amadeus-core
22

3-
This subcrate of the [`amadeus`](https://github.com/alecmocatta/amadeus) project includes fundamental definitions including `DistributedIterator` and `ProcessPool`.
3+
This subcrate of the [`amadeus`](https://github.com/constellation-rs/amadeus) project includes fundamental definitions including `DistributedIterator` and `ProcessPool`.

amadeus-core/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//!
77
//! This is a support crate of [Amadeus](https://github.com/constellation-rs/amadeus) and is not intended to be used directly. All functionality is re-exposed in [`amadeus`](https://docs.rs/amadeus/0.3/amadeus/).
88
9-
#![doc(html_root_url = "https://docs.rs/amadeus-core/0.3.7")]
9+
#![doc(html_root_url = "https://docs.rs/amadeus-core/0.4.0")]
1010
#![cfg_attr(nightly, feature(unboxed_closures))]
1111
#![recursion_limit = "25600"]
1212
#![warn(

0 commit comments

Comments
 (0)