Skip to content

Commit f6a3075

Browse files
committed
Compare strings lexicographically for column statistics
CubeStore uses the lexicographical order everywhere and requires the the exact same order in statistics. This also follows the parquet specification.
1 parent 0293429 commit f6a3075

File tree

1 file changed

+8
-18
lines changed

1 file changed

+8
-18
lines changed

parquet/src/data_type.rs

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -116,23 +116,13 @@ pub struct ByteArray {
116116

117117
impl PartialOrd for ByteArray {
118118
fn partial_cmp(&self, other: &ByteArray) -> Option<Ordering> {
119-
if self.data.is_some() && other.data.is_some() {
120-
match self.len().cmp(&other.len()) {
121-
Ordering::Greater => Some(Ordering::Greater),
122-
Ordering::Less => Some(Ordering::Less),
123-
Ordering::Equal => {
124-
for (v1, v2) in self.data().iter().zip(other.data().iter()) {
125-
match v1.cmp(v2) {
126-
Ordering::Greater => return Some(Ordering::Greater),
127-
Ordering::Less => return Some(Ordering::Less),
128-
_ => {}
129-
}
130-
}
131-
Some(Ordering::Equal)
132-
}
133-
}
134-
} else {
135-
None
119+
// Changed in CubeStore fork: we want to compare lexicographically (we store strings here).
120+
// Original code uses other comparisons.
121+
match (&self.data, &other.data) {
122+
(None, None) => Some(Ordering::Equal),
123+
(None, Some(_)) => Some(Ordering::Less),
124+
(Some(_), None) => Some(Ordering::Greater),
125+
(Some(a), Some(b)) => Some(a.data().cmp(b.data())),
136126
}
137127
}
138128
}
@@ -1356,7 +1346,7 @@ mod tests {
13561346
let ba4 = ByteArray::from(vec![]);
13571347
let ba5 = ByteArray::from(vec![2, 2, 3]);
13581348

1359-
assert!(ba1 > ba2);
1349+
assert!(ba2 > ba1);
13601350
assert!(ba3 > ba1);
13611351
assert!(ba1 > ba4);
13621352
assert_eq!(ba1, ba11);

0 commit comments

Comments
 (0)