Metadata: Verify that there are no gaps in pk and ck

Lorak-mmk · wprzytula · Lorak-mmk · commit da4a2478d9e3 · 2025-03-01T18:16:52.000+01:00
Previously it was possible that some positions of `partition_key` and
`clustering_key` would remain unfilled and thus empty strings.

The probability was very low - Scylla would have to return very weird
data - but the possibility was there.

This commit verifies that this is not happening, and returns and error
if it is.

Co-authored-by: Wojciech Przytuła &lt;wojciech.przytula@scylladb.com&gt;
diff --git a/scylla/src/cluster/metadata.rs b/scylla/src/cluster/metadata.rs
@@ -77,6 +77,10 @@ type PerKsTableResult<T, E> = PerKsTable<Result<T, E>>;
 pub(crate) enum SingleKeyspaceMetadataError {
     #[error(transparent)]
     MissingUDT(MissingUserDefinedType),
+    #[error("Partition key column with position {0} is missing from metadata")]
+    IncompletePartitionKey(i32),
+    #[error("Clustering key column with position {0} is missing from metadata")]
+    IncompleteClusteringKey(i32),
 }
 
 /// Allows to read current metadata from the cluster
@@ -1606,7 +1610,7 @@ async fn query_tables_schema(
     let mut all_partitioners = query_table_partitioners(conn).await?;
     let mut result = HashMap::new();
 
-    for ((keyspace_name, table_name), table_result) in tables_schema {
+    'tables_loop: for ((keyspace_name, table_name), table_result) in tables_schema {
         let keyspace_and_table_name = (keyspace_name, table_name);
 
         #[allow(clippy::type_complexity)]
@@ -1621,16 +1625,52 @@ async fn query_tables_schema(
                 continue;
             }
         };
-        let mut partition_key = vec!["".to_string(); partition_key_columns.len()];
-        for (position, column_name) in partition_key_columns {
-            partition_key[position as usize] = column_name;
-        }
 
-        let mut clustering_key = vec!["".to_string(); clustering_key_columns.len()];
-        for (position, column_name) in clustering_key_columns {
-            clustering_key[position as usize] = column_name;
+        fn validate_key_columns(mut key_columns: Vec<(i32, String)>) -> Result<Vec<String>, i32> {
+            key_columns.sort_unstable_by_key(|(position, _)| *position);
+
+            key_columns
+                .into_iter()
+                .enumerate()
+                .map(|(idx, (position, column_name))| {
+                    // unwrap: I don't see the point of handling the scenario of fetching over
+                    // 2 * 10^9 columns.
+                    let idx: i32 = idx.try_into().unwrap();
+                    if idx == position {
+                        Ok(column_name)
+                    } else {
+                        Err(idx)
+                    }
+                })
+                .collect::<Result<Vec<_>, _>>()
         }
 
+        let partition_key = match validate_key_columns(partition_key_columns) {
+            Ok(partition_key_columns) => partition_key_columns,
+            Err(position) => {
+                result.insert(
+                    keyspace_and_table_name,
+                    Err(SingleKeyspaceMetadataError::IncompletePartitionKey(
+                        position,
+                    )),
+                );
+                continue 'tables_loop;
+            }
+        };
+
+        let clustering_key = match validate_key_columns(clustering_key_columns) {
+            Ok(clustering_key_columns) => clustering_key_columns,
+            Err(position) => {
+                result.insert(
+                    keyspace_and_table_name,
+                    Err(SingleKeyspaceMetadataError::IncompleteClusteringKey(
+                        position,
+                    )),
+                );
+                continue 'tables_loop;
+            }
+        };
+
         let partitioner = all_partitioners
             .remove(&keyspace_and_table_name)
             .unwrap_or_default();