Skip to content

Commit e655062

Browse files
ldanilekConvex, Inc.
authored andcommitted
support _id filters without typescript (#24298)
support index filters on `_id` fields. don't change typescript yet so we can do a soft launch, testing in prod a bit before exposing it to customers. the tricky part here is when it's a virtual table. The `_id` strings use the virtual table number but the underlying Intervals use the physical table number. Luckily, base32 preserves order, so ordering is all the same. When the `_id` filter is a valid virtual ID, it's easy to map it to the corresponding physical ID. But what if the `_id` filter isn't a virtual ID? Then we need to find a string in the physical ID space that corresponds to the same place in the order. Added a bunch of proptests so I think it's working. GitOrigin-RevId: 6677664011760c805d32d87851c470c9d84876f9
1 parent 66a6cf8 commit e655062

File tree

8 files changed

+478
-10
lines changed

8 files changed

+478
-10
lines changed

crates/common/src/bootstrap_model/index/database_index/indexed_fields.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::{
22
collections::HashSet,
33
convert::TryFrom,
44
fmt::Display,
5+
iter,
56
ops::Deref,
67
};
78

@@ -44,6 +45,10 @@ impl IndexedFields {
4445
.expect("Invalid _creationTime field path");
4546
IndexedFields(vec![field_path].into())
4647
}
48+
49+
pub fn iter_with_id(&self) -> impl Iterator<Item = &FieldPath> {
50+
self.iter().chain(iter::once(&*ID_FIELD_PATH))
51+
}
4752
}
4853

4954
impl HeapSize for IndexedFields {

crates/common/src/query.rs

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,19 @@ use sha2::{
2525
};
2626
use value::{
2727
heap_size::HeapSize,
28+
id_v6::VirtualTableNumberMap,
2829
utils::display_sequence,
2930
val,
3031
ConvexObject,
3132
ConvexValue,
33+
DeveloperDocumentId,
3234
TableId,
3335
TableIdAndTableNumber,
3436
};
3537

3638
use crate::{
3739
bootstrap_model::index::database_index::IndexedFields,
40+
document::ID_FIELD_PATH,
3841
index::IndexKeyBytes,
3942
interval::{
4043
BinaryKey,
@@ -176,26 +179,36 @@ pub struct IndexRange {
176179
}
177180

178181
impl IndexRange {
179-
pub fn compile(self, indexed_fields: IndexedFields) -> anyhow::Result<Interval> {
182+
pub fn compile(
183+
self,
184+
indexed_fields: IndexedFields,
185+
virtual_table_number_map: Option<VirtualTableNumberMap>,
186+
) -> anyhow::Result<Interval> {
180187
let index_name = self.index_name.clone();
181188
let SplitIndexRange {
182189
equalities,
183190
inequality,
184-
} = self.split()?;
191+
} = self.split()?.map_values(|field, v| {
192+
if field == &*ID_FIELD_PATH {
193+
map_id_value_to_tablet(v, virtual_table_number_map)
194+
} else {
195+
Ok(v)
196+
}
197+
})?;
185198

186199
// Check that some permutation of the equality field paths + the (optional)
187200
// inequality field path is a prefix of the indexed paths.
188201
//
189202
// NB: `indexed_fields` does not include the implicit `_id` field at the end of
190203
// every index, so this omission prevents the user from using it in an
191204
// index expression.
192-
let index_rank: BTreeMap<_, _> = indexed_fields[..]
193-
.iter()
205+
let index_rank: BTreeMap<_, _> = indexed_fields
206+
.iter_with_id()
194207
.enumerate()
195208
.map(|(i, field_name)| (field_name, i))
196209
.collect();
197210
anyhow::ensure!(
198-
index_rank.len() == indexed_fields.len(),
211+
index_rank.len() == indexed_fields.iter_with_id().count(),
199212
"{index_name} has duplicate fields?"
200213
);
201214

@@ -237,7 +250,7 @@ impl IndexRange {
237250

238251
let query_fields = QueryFields(used_paths.clone());
239252

240-
let mut fields_iter = indexed_fields.iter();
253+
let mut fields_iter = indexed_fields.iter_with_id();
241254
for field_path in used_paths {
242255
let matching_field = fields_iter.next().ok_or_else(|| {
243256
invalid_index_range(&index_name, &indexed_fields, &query_fields, &field_path)
@@ -387,6 +400,49 @@ struct SplitIndexRange {
387400
inequality: Option<IndexInequality>,
388401
}
389402

403+
impl SplitIndexRange {
404+
pub fn map_values(
405+
self,
406+
f: impl Fn(&FieldPath, ConvexValue) -> anyhow::Result<ConvexValue>,
407+
) -> anyhow::Result<SplitIndexRange> {
408+
let equalities = self
409+
.equalities
410+
.into_iter()
411+
.map(|(field, value)| {
412+
let new_value = match value.0 {
413+
Some(value) => MaybeValue(Some(f(&field, value)?)),
414+
None => MaybeValue(None),
415+
};
416+
anyhow::Ok((field, new_value))
417+
})
418+
.try_collect()?;
419+
let inequality = self
420+
.inequality
421+
.map(|inequality| {
422+
let start = match inequality.start {
423+
Bound::Unbounded => Bound::Unbounded,
424+
Bound::Included(value) => Bound::Included(f(&inequality.field_path, value)?),
425+
Bound::Excluded(value) => Bound::Excluded(f(&inequality.field_path, value)?),
426+
};
427+
let end = match inequality.end {
428+
Bound::Unbounded => Bound::Unbounded,
429+
Bound::Included(value) => Bound::Included(f(&inequality.field_path, value)?),
430+
Bound::Excluded(value) => Bound::Excluded(f(&inequality.field_path, value)?),
431+
};
432+
anyhow::Ok(IndexInequality {
433+
field_path: inequality.field_path,
434+
start,
435+
end,
436+
})
437+
})
438+
.transpose()?;
439+
Ok(SplitIndexRange {
440+
equalities,
441+
inequality,
442+
})
443+
}
444+
}
445+
390446
struct IndexInequality {
391447
field_path: FieldPath,
392448
start: Bound<ConvexValue>,
@@ -403,6 +459,21 @@ impl Display for QueryFields {
403459
}
404460
}
405461

462+
fn map_id_value_to_tablet(
463+
value: ConvexValue,
464+
virtual_table_number_map: Option<VirtualTableNumberMap>,
465+
) -> anyhow::Result<ConvexValue> {
466+
let val = match (&value, virtual_table_number_map) {
467+
(ConvexValue::String(id), Some(virtual_table_number_map)) => {
468+
let mapped =
469+
DeveloperDocumentId::map_string_between_table_numbers(id, virtual_table_number_map);
470+
val!(mapped)
471+
},
472+
_ => value,
473+
};
474+
Ok(val)
475+
}
476+
406477
fn already_defined_bound_error(
407478
bound_type: &str,
408479
field_path: &FieldPath,

crates/common/src/types/index.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ use sync_types::identifier::{
1111
};
1212
use value::{
1313
heap_size::HeapSize,
14+
id_v6::VirtualTableNumberMap,
1415
FieldName,
1516
InternalId,
1617
ResolvedDocumentId,
1718
TableId,
1819
TableIdAndTableNumber,
1920
TableIdentifier,
21+
TableMapping,
2022
TableName,
23+
VirtualTableMapping,
2124
};
2225

2326
use crate::{
@@ -119,6 +122,35 @@ pub enum StableIndexName {
119122
Missing,
120123
}
121124

125+
impl StableIndexName {
126+
pub fn virtual_table_number_map(
127+
&self,
128+
table_mapping: &TableMapping,
129+
virtual_table_mapping: &VirtualTableMapping,
130+
) -> anyhow::Result<Option<VirtualTableNumberMap>> {
131+
match self {
132+
StableIndexName::Physical(index_name) => {
133+
let table_number =
134+
table_mapping.inject_table_number()(*index_name.table())?.table_number;
135+
Ok(Some(VirtualTableNumberMap {
136+
virtual_table_number: table_number,
137+
physical_table_number: table_number,
138+
}))
139+
},
140+
StableIndexName::Virtual(index_name, tablet_index_name) => {
141+
Ok(Some(VirtualTableNumberMap {
142+
virtual_table_number: virtual_table_mapping.number(index_name.table())?,
143+
physical_table_number: table_mapping.inject_table_number()(
144+
*tablet_index_name.table(),
145+
)?
146+
.table_number,
147+
}))
148+
},
149+
StableIndexName::Missing => Ok(None),
150+
}
151+
}
152+
}
153+
122154
impl HeapSize for TabletIndexName {
123155
fn heap_size(&self) -> usize {
124156
self.descriptor.heap_size()

crates/database/src/query/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,10 @@ impl<RT: Runtime, T: QueryType> CompiledQuery<RT, T> {
375375
)),
376376
QuerySource::IndexRange(index_range) => {
377377
let order = index_range.order;
378-
let interval = index_range.compile(indexed_fields)?;
378+
let virtual_table_mapping = tx.virtual_table_mapping().clone();
379+
let virtual_table_number_map = stable_index_name
380+
.virtual_table_number_map(tx.table_mapping(), &virtual_table_mapping)?;
381+
let interval = index_range.compile(indexed_fields, virtual_table_number_map)?;
379382
QueryNode::IndexRange(IndexRange::new(
380383
stable_index_name,
381384
index_name,

crates/isolate/src/tests/id_strings.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,20 @@ async fn test_system_normalize_id(rt: TestRuntime) -> anyhow::Result<()> {
196196
Ok(())
197197
}
198198

199+
#[convex_macro::test_runtime]
200+
async fn test_virtual_id_query(rt: TestRuntime) -> anyhow::Result<()> {
201+
let t = UdfTest::default(rt).await?;
202+
let scheduled_id = t.mutation("idStrings:schedule", assert_obj!()).await?;
203+
204+
t.query(
205+
"idStrings:queryVirtualId",
206+
assert_obj!("id" => scheduled_id),
207+
)
208+
.await?;
209+
210+
Ok(())
211+
}
212+
199213
proptest! {
200214
#![proptest_config(ProptestConfig { cases: 32 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })]
201215

crates/value/src/base32.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::cmp;
2+
13
// Forked from https://github.com/andreasots/base32 @ 58909ac.
24
//
35
// Copyright (c) 2015 The base32 Developers - MIT License
@@ -121,6 +123,72 @@ pub fn decode(data: &str) -> Result<Vec<u8>, InvalidBase32Error> {
121123
Ok(out)
122124
}
123125

126+
fn clamp_char_to_alphabet(c: char) -> char {
127+
match c {
128+
..'0' => '0',
129+
'0'..='9' => c,
130+
':'..'a' => 'a',
131+
'a'..='h' => c,
132+
'i' => 'j',
133+
'j'..='k' => c,
134+
'l' => 'm',
135+
'm'..='n' => c,
136+
'o' => 'p',
137+
'p'..='t' => c,
138+
'u' => 'v',
139+
'v'..='z' => c,
140+
'{'.. => 'z',
141+
}
142+
}
143+
144+
/// Returns a string that can be base32 decoded, and is the closest such string
145+
/// to the input string in lexicographic order.
146+
/// i.e. for every `t` which is valid base32 of length <= `target_len`,
147+
/// if s = t, then clamp_to_alphabet(s) = t,
148+
/// if s < t, then clamp_to_alphabet(s) <= t.
149+
/// if s > t, then clamp_to_alphabet(s) >= t.
150+
///
151+
/// How does it work?
152+
/// Each character is clamped to the closest character in the base32 alphabet.
153+
/// If a character has to be rounded up, following characters all become '0'.
154+
/// If a character has to be rounded down, following characters all become 'z'.
155+
/// And then we pad to a multiple of 5 characters to avoid dropping chars when
156+
/// decoding.
157+
///
158+
/// e.g. "azi" is between "azhzz" and "azj00", so it is clamped to "azj00".
159+
/// e.g. "abcd~" is between "abcdz" and "abce0", so it is clamped to "abcdz".
160+
pub fn clamp_to_alphabet(s: &str, target_len: usize) -> String {
161+
let mut out = String::with_capacity(s.len());
162+
let mut order = cmp::Ordering::Equal;
163+
for c in s.chars() {
164+
match order {
165+
cmp::Ordering::Equal => {
166+
let clamped = clamp_char_to_alphabet(c);
167+
out.push(clamped);
168+
order = c.cmp(&clamped);
169+
},
170+
cmp::Ordering::Less => {
171+
out.push('0');
172+
},
173+
cmp::Ordering::Greater => {
174+
out.push('z');
175+
},
176+
}
177+
}
178+
// Pad the output to a multiple of 5 with 0s so no characters are lost.
179+
while out.len() < target_len || out.len() % 5 != 0 {
180+
match order {
181+
cmp::Ordering::Equal | cmp::Ordering::Less => {
182+
out.push('0');
183+
},
184+
cmp::Ordering::Greater => {
185+
out.push('z');
186+
},
187+
}
188+
}
189+
out
190+
}
191+
124192
#[cfg(test)]
125193
mod tests {
126194
use proptest::prelude::*;

0 commit comments

Comments
 (0)