Skip to content

Commit dfaed0b

Browse files
committed
Add composite aggregation
Uses the latest composite PR in Tantivy
1 parent e71f49e commit dfaed0b

File tree

4 files changed

+183
-11
lines changed

4 files changed

+183
-11
lines changed

quickwit/Cargo.lock

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ quickwit-serve = { path = "quickwit-serve" }
346346
quickwit-storage = { path = "quickwit-storage" }
347347
quickwit-telemetry = { path = "quickwit-telemetry" }
348348

349-
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "dabcaa5", default-features = false, features = [
349+
tantivy = { git = "https://github.com/SekoiaLab/tantivy/", rev = "bbdf83e", default-features = false, features = [
350350
"lz4-compression",
351351
"mmap",
352352
"quickwit",

quickwit/quickwit-query/src/aggregations.rs

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ use tantivy::aggregation::Key as TantivyKey;
1818
use tantivy::aggregation::agg_result::{
1919
AggregationResult as TantivyAggregationResult, AggregationResults as TantivyAggregationResults,
2020
BucketEntries as TantivyBucketEntries, BucketEntry as TantivyBucketEntry,
21-
BucketResult as TantivyBucketResult, MetricResult as TantivyMetricResult,
21+
BucketResult as TantivyBucketResult, CompositeBucketEntry as TantivyCompositeBucketEntry,
22+
CompositeKey as TantivyCompositeKey, MetricResult as TantivyMetricResult,
2223
RangeBucketEntry as TantivyRangeBucketEntry,
2324
};
2425
use tantivy::aggregation::metric::{
@@ -169,6 +170,13 @@ pub enum BucketResult {
169170
/// The upper bound error for the doc count of each term.
170171
doc_count_error_upper_bound: Option<u64>,
171172
},
173+
/// This is the composite aggregation result
174+
Composite {
175+
/// The buckets
176+
buckets: Vec<CompositeBucketEntry>,
177+
/// The key to start after when paginating
178+
after_key: FxHashMap<String, CompositeKey>,
179+
},
172180
}
173181

174182
impl From<TantivyBucketResult> for BucketResult {
@@ -189,6 +197,10 @@ impl From<TantivyBucketResult> for BucketResult {
189197
sum_other_doc_count,
190198
doc_count_error_upper_bound,
191199
},
200+
TantivyBucketResult::Composite { buckets, after_key } => BucketResult::Composite {
201+
buckets: buckets.into_iter().map(Into::into).collect(),
202+
after_key: after_key.into_iter().map(|(k, v)| (k, v.into())).collect(),
203+
},
192204
}
193205
}
194206
}
@@ -211,6 +223,10 @@ impl From<BucketResult> for TantivyBucketResult {
211223
sum_other_doc_count,
212224
doc_count_error_upper_bound,
213225
},
226+
BucketResult::Composite { buckets, after_key } => TantivyBucketResult::Composite {
227+
buckets: buckets.into_iter().map(Into::into).collect(),
228+
after_key: after_key.into_iter().map(|(k, v)| (k, v.into())).collect(),
229+
},
214230
}
215231
}
216232
}
@@ -410,3 +426,75 @@ impl From<PercentilesMetricResult> for TantivyPercentilesMetricResult {
410426
TantivyPercentilesMetricResult { values }
411427
}
412428
}
429+
430+
#[derive(Clone, Debug, Serialize, Deserialize)]
431+
pub enum CompositeKey {
432+
/// Boolean key
433+
Bool(bool),
434+
/// String key
435+
Str(String),
436+
/// `i64` key
437+
I64(i64),
438+
/// `u64` key
439+
U64(u64),
440+
/// `f64` key
441+
F64(f64),
442+
/// Null key
443+
Null,
444+
}
445+
446+
#[derive(Clone, Debug, Serialize, Deserialize)]
447+
pub struct CompositeBucketEntry {
448+
/// The identifier of the bucket.
449+
pub key: FxHashMap<String, CompositeKey>,
450+
/// Number of documents in the bucket.
451+
pub doc_count: u64,
452+
/// Sub-aggregations in this bucket.
453+
pub sub_aggregation: AggregationResults,
454+
}
455+
456+
impl From<TantivyCompositeKey> for CompositeKey {
457+
fn from(value: TantivyCompositeKey) -> CompositeKey {
458+
match value {
459+
TantivyCompositeKey::Bool(b) => CompositeKey::Bool(b),
460+
TantivyCompositeKey::Str(s) => CompositeKey::Str(s),
461+
TantivyCompositeKey::I64(i) => CompositeKey::I64(i),
462+
TantivyCompositeKey::U64(u) => CompositeKey::U64(u),
463+
TantivyCompositeKey::F64(f) => CompositeKey::F64(f),
464+
TantivyCompositeKey::Null => CompositeKey::Null,
465+
}
466+
}
467+
}
468+
469+
impl From<CompositeKey> for TantivyCompositeKey {
470+
fn from(value: CompositeKey) -> TantivyCompositeKey {
471+
match value {
472+
CompositeKey::Bool(b) => TantivyCompositeKey::Bool(b),
473+
CompositeKey::Str(s) => TantivyCompositeKey::Str(s),
474+
CompositeKey::I64(i) => TantivyCompositeKey::I64(i),
475+
CompositeKey::U64(u) => TantivyCompositeKey::U64(u),
476+
CompositeKey::F64(f) => TantivyCompositeKey::F64(f),
477+
CompositeKey::Null => TantivyCompositeKey::Null,
478+
}
479+
}
480+
}
481+
482+
impl From<TantivyCompositeBucketEntry> for CompositeBucketEntry {
483+
fn from(value: TantivyCompositeBucketEntry) -> CompositeBucketEntry {
484+
CompositeBucketEntry {
485+
key: value.key.into_iter().map(|(k, v)| (k, v.into())).collect(),
486+
doc_count: value.doc_count,
487+
sub_aggregation: value.sub_aggregation.into(),
488+
}
489+
}
490+
}
491+
492+
impl From<CompositeBucketEntry> for TantivyCompositeBucketEntry {
493+
fn from(value: CompositeBucketEntry) -> TantivyCompositeBucketEntry {
494+
TantivyCompositeBucketEntry {
495+
key: value.key.into_iter().map(|(k, v)| (k, v.into())).collect(),
496+
doc_count: value.doc_count,
497+
sub_aggregation: value.sub_aggregation.into(),
498+
}
499+
}
500+
}

quickwit/rest-api-tests/scenarii/aggregations/0001-aggregations.yaml

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ expected:
375375
aggregations:
376376
response_stats:
377377
sum_of_squares: 55300.0
378+
---
378379
# Test term aggs number precision
379380
method: [GET]
380381
engines:
@@ -393,3 +394,86 @@ expected:
393394
buckets:
394395
- doc_count: 1
395396
key: 1769070189829214200
397+
---
398+
# Test composite aggregation
399+
method: [GET]
400+
engines:
401+
- quickwit
402+
endpoint: _elastic/aggregations/_search
403+
json:
404+
size: 0
405+
aggs:
406+
host_name_composite:
407+
composite:
408+
size: 5
409+
sources:
410+
- host:
411+
terms:
412+
field: "host"
413+
missing_bucket: true
414+
- name:
415+
terms:
416+
field: "name"
417+
- response:
418+
histogram:
419+
field: "response"
420+
interval: 50
421+
expected:
422+
aggregations:
423+
host_name_composite:
424+
buckets:
425+
- key: { "host": null, "name": "Bernhard", "response": 100.0 }
426+
doc_count: 1
427+
- key: { "host": null, "name": "Fritz", "response": 0.0 }
428+
doc_count: 2
429+
- key: { "host": "192.168.0.1", "name": "Fred", "response": 100.0 }
430+
doc_count: 1
431+
- key: { "host": "192.168.0.1", "name": "Fritz", "response": 0.0 }
432+
doc_count: 1
433+
- key: { "host": "192.168.0.10", "name": "Albert", "response": 100.0 }
434+
doc_count: 1
435+
after_key:
436+
host: "192.168.0.10"
437+
name: "Albert"
438+
response: 100.0
439+
440+
---
441+
# Test composite aggregation paging
442+
method: [GET]
443+
engines:
444+
- quickwit
445+
endpoint: _elastic/aggregations/_search
446+
json:
447+
size: 0
448+
aggs:
449+
host_name_composite:
450+
composite:
451+
size: 5
452+
sources:
453+
- host:
454+
terms:
455+
field: "host"
456+
missing_bucket: true
457+
- name:
458+
terms:
459+
field: "name"
460+
- response:
461+
histogram:
462+
field: "response"
463+
interval: 50
464+
after:
465+
host: "192.168.0.10"
466+
name: "Albert"
467+
response: 100.0
468+
expected:
469+
aggregations:
470+
host_name_composite:
471+
buckets:
472+
- key: { "host": "192.168.0.10", "name": "Holger", "response": 0.0 }
473+
doc_count: 1
474+
# Horst is missing because his response field is missing
475+
- key: { "host": "192.168.0.10", "name": "Werner", "response": 0.0 }
476+
doc_count: 1
477+
- key: { "host": "192.168.0.11", "name": "Manfred", "response": 100.0 }
478+
doc_count: 1
479+
---

0 commit comments

Comments
 (0)