apache · waynexia · Aug 27, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -781,3 +781,11 @@ jobs:
       - name: Check datafusion-proto
         working-directory: datafusion/proto
         run: cargo msrv --output-format json --log-target stdout verify
+  typos:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+        with:
+          persist-credentials: false
+      - uses: crate-ci/typos@master
diff --git a/benchmarks/src/bin/external_aggr.rs b/benchmarks/src/bin/external_aggr.rs
@@ -113,7 +113,7 @@ impl ExternalAggrConfig {
         "#,
     ];
 
-    /// If `--query` and `--memory-limit` is not speicified, run all queries
+    /// If `--query` and `--memory-limit` is not specified, run all queries
     /// with pre-configured memory limits
     /// If only `--query` is specified, run the query with all memory limits
     /// for this query

diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
@@ -64,7 +64,7 @@ async fn setup_minio_container() -> ContainerAsync<minio::MinIO> {
 
     match container {
         Ok(container) => {
-            // We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency
+            // We wait for MinIO to be healthy and prepare test files. We do it via CLI to avoid s3 dependency
             let commands = [
                 ExecCommand::new(["/usr/bin/mc", "ready", "local"]),
                 ExecCommand::new([

diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_file_casts.rs
@@ -41,7 +41,7 @@ use object_store::path::Path;
 use object_store::{ObjectStore, PutPayload};
 
 // Example showing how to implement custom casting rules to adapt file schemas.
-// This example enforces that casts must be stricly widening: if the file type is Int64 and the table type is Int32, it will error
+// This example enforces that casts must be strictly widening: if the file type is Int64 and the table type is Int32, it will error
 // before even reading the data.
 // Without this custom cast rule DataFusion would happily do the narrowing cast, potentially erroring only if it found a row with data it could not cast.
 

diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs
@@ -85,7 +85,7 @@ async fn main() -> Result<()> {
     boundary_analysis_and_selectivity_demo()?;
 
     // See how boundary analysis works for `AND` & `OR` conjunctions.
-    boundary_analysis_in_conjuctions_demo()?;
+    boundary_analysis_in_conjunctions_demo()?;
 
     // See how to determine the data types of expressions
     expression_type_demo()?;
@@ -351,7 +351,7 @@ fn boundary_analysis_and_selectivity_demo() -> Result<()> {
 
 /// This function shows how to think about and leverage the analysis API
 /// to infer boundaries in `AND` & `OR` conjunctions.
-fn boundary_analysis_in_conjuctions_demo() -> Result<()> {
+fn boundary_analysis_in_conjunctions_demo() -> Result<()> {
     // Let us consider the more common case of AND & OR conjunctions.
     //
     // age > 18 AND age <= 25

diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs
@@ -810,7 +810,7 @@ impl InformationSchemaColumnsBuilder {
     ) {
         use DataType::*;
 
-        // Note: append_value is actually infallable.
+        // Note: append_value is actually infallible.
         self.catalog_names.append_value(catalog_name);
         self.schema_names.append_value(schema_name);
         self.table_names.append_value(table_name);

diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs
@@ -158,7 +158,7 @@ macro_rules! assert_batches_sorted_eq {
 /// Is a macro so test error
 /// messages are on the same line as the failure;
 ///
-/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
+/// Both arguments must be convertible into Strings ([`Into`]<[`String`]>)
 #[macro_export]
 macro_rules! assert_contains {
     ($ACTUAL: expr, $EXPECTED: expr) => {
@@ -181,7 +181,7 @@ macro_rules! assert_contains {
 /// Is a macro so test error
 /// messages are on the same line as the failure;
 ///
-/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
+/// Both arguments must be convertible into Strings ([`Into`]<[`String`]>)
 #[macro_export]
 macro_rules! assert_not_contains {
     ($ACTUAL: expr, $UNEXPECTED: expr) => {

diff --git a/datafusion/core/benches/spm.rs b/datafusion/core/benches/spm.rs
@@ -66,7 +66,7 @@ fn generate_spm_for_round_robin_tie_breaker(
     };
 
     let rbs = (0..batch_count).map(|_| rb.clone()).collect::<Vec<_>>();
-    let partitiones = vec![rbs.clone(); partition_count];
+    let partitions = vec![rbs.clone(); partition_count];
 
     let schema = rb.schema();
     let sort = [
@@ -81,7 +81,7 @@ fn generate_spm_for_round_robin_tie_breaker(
     ]
     .into();
 
-    let exec = MemorySourceConfig::try_new_exec(&partitiones, schema, None).unwrap();
+    let exec = MemorySourceConfig::try_new_exec(&partitions, schema, None).unwrap();
     SortPreservingMergeExec::new(sort, exec)
         .with_round_robin_repartition(enable_round_robin_repartition)
 }

diff --git a/datafusion/core/src/bin/print_functions_docs.rs b/datafusion/core/src/bin/print_functions_docs.rs
@@ -260,7 +260,7 @@ fn print_docs(
     }
 }
 
-/// Trait for accessing name / aliases / documentation for differnet functions
+/// Trait for accessing name / aliases / documentation for different functions
 trait DocProvider {
     fn get_name(&self) -> String;
     fn get_aliases(&self) -> Vec<String>;

diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
@@ -215,16 +215,16 @@ impl ListingTableConfig {
     ) -> Result<(String, Option<String>)> {
         let mut exts = path.rsplit('.');
 
-        let splitted = exts.next().unwrap_or("");
+        let split = exts.next().unwrap_or("");
 
-        let file_compression_type = FileCompressionType::from_str(splitted)
+        let file_compression_type = FileCompressionType::from_str(split)
             .unwrap_or(FileCompressionType::UNCOMPRESSED);
 
         if file_compression_type.is_compressed() {
-            let splitted2 = exts.next().unwrap_or("");
-            Ok((splitted2.to_string(), Some(splitted.to_string())))
+            let split2 = exts.next().unwrap_or("");
+            Ok((split2.to_string(), Some(split.to_string())))
         } else {
-            Ok((splitted.to_string(), None))
+            Ok((split.to_string(), None))
         }
     }
 
@@ -502,7 +502,7 @@ impl ListingOptions {
     ///
     /// Currently this sets `target_partitions` and `collect_stat`
     /// but if more options are added in the future that need to be coordinated
-    /// they will be synchronized thorugh this method.
+    /// they will be synchronized through this method.
     pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self {
         self = self.with_target_partitions(config.target_partitions());
         self = self.with_collect_stat(config.collect_statistics());
@@ -1106,8 +1106,8 @@ impl ListingTable {
 }
 
 // Expressions can be used for parttion pruning if they can be evaluated using
-// only the partiton columns and there are partition columns.
-fn can_be_evaluted_for_partition_pruning(
+// only the partition columns and there are partition columns.
+fn can_be_evaluated_for_partition_pruning(
     partition_column_names: &[&str],
     expr: &Expr,
 ) -> bool {
@@ -1156,7 +1156,7 @@ impl TableProvider for ListingTable {
         // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
         let (partition_filters, filters): (Vec<_>, Vec<_>) =
             filters.iter().cloned().partition(|filter| {
-                can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter)
+                can_be_evaluated_for_partition_pruning(&table_partition_col_names, filter)
             });
 
         // We should not limit the number of partitioned files to scan if there are filters and limit
@@ -1245,7 +1245,7 @@ impl TableProvider for ListingTable {
         filters
             .iter()
             .map(|filter| {
-                if can_be_evaluted_for_partition_pruning(&partition_column_names, filter)
+                if can_be_evaluated_for_partition_pruning(&partition_column_names, filter)
                 {
                     // if filter can be handled by partition pruning, it is exact
                     return Ok(TableProviderFilterPushDown::Exact);

diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -332,7 +332,7 @@ mod tests {
         let metric = get_value(&metrics, "pushdown_rows_pruned");
         assert_eq!(metric, 3, "Expected all rows to be pruned");
 
-        // If we excplicitly allow nulls the rest of the predicate should work
+        // If we explicitly allow nulls the rest of the predicate should work
         let filter = col("c2").is_null().and(col("c1").eq(lit(1_i32)));
         let rt = RoundTrip::new()
             .with_table_schema(table_schema.clone())
@@ -390,7 +390,7 @@ mod tests {
         let metric = get_value(&metrics, "pushdown_rows_pruned");
         assert_eq!(metric, 3, "Expected all rows to be pruned");
 
-        // If we excplicitly allow nulls the rest of the predicate should work
+        // If we explicitly allow nulls the rest of the predicate should work
         let filter = col("c2").is_null().and(col("c1").eq(lit(1_i32)));
         let rt = RoundTrip::new()
             .with_table_schema(table_schema.clone())
@@ -452,7 +452,7 @@ mod tests {
         let metric = get_value(&metrics, "pushdown_rows_pruned");
         assert_eq!(metric, 3, "Expected all rows to be pruned");
 
-        // If we excplicitly allow nulls the rest of the predicate should work
+        // If we explicitly allow nulls the rest of the predicate should work
         let filter = col("c2").is_null().and(col("c1").eq(lit(1_i32)));
         let rt = RoundTrip::new()
             .with_table_schema(table_schema.clone())
@@ -514,7 +514,7 @@ mod tests {
         let metric = get_value(&metrics, "pushdown_rows_pruned");
         assert_eq!(metric, 3, "Expected all rows to be pruned");
 
-        // If we excplicitly allow nulls the rest of the predicate should work
+        // If we explicitly allow nulls the rest of the predicate should work
         let filter = col("c2").is_null().and(col("c3").eq(lit(7_i32)));
         let rt = RoundTrip::new()
             .with_table_schema(table_schema.clone())

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -2352,7 +2352,7 @@ impl<'n> TreeNodeVisitor<'n> for OptimizationInvariantChecker<'_> {
 
     fn f_down(&mut self, node: &'n Self::Node) -> Result<TreeNodeRecursion> {
         // Checks for the more permissive `InvariantLevel::Always`.
-        // Plans are not guarenteed to be executable after each physical optimizer run.
+        // Plans are not guaranteed to be executable after each physical optimizer run.
         node.check_invariants(InvariantLevel::Always).map_err(|e|
             e.context(format!("Invariant for ExecutionPlan node '{}' failed for PhysicalOptimizer rule '{}'", node.name(), self.rule.name()))
         )?;

diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
@@ -92,8 +92,8 @@ async fn physical_plan_to_string(df: &DataFrame) -> String {
         .await
         .expect("Error creating physical plan");
 
-    let formated = displayable(physical_plan.as_ref()).indent(true);
-    formated.to_string()
+    let formatted = displayable(physical_plan.as_ref()).indent(true);
+    formatted.to_string()
 }
 
 pub fn table_with_constraints() -> Arc<dyn TableProvider> {
@@ -5666,7 +5666,7 @@ async fn test_alias() -> Result<()> {
         .await?
         .select(vec![col("a"), col("test.b"), lit(1).alias("one")])?
         .alias("table_alias")?;
-    // All ouput column qualifiers are changed to "table_alias"
+    // All output column qualifiers are changed to "table_alias"
     df.schema().columns().iter().for_each(|c| {
         assert_eq!(c.relation, Some("table_alias".into()));
     });

diff --git a/datafusion/core/tests/execution/coop.rs b/datafusion/core/tests/execution/coop.rs
@@ -429,7 +429,7 @@ async fn interleave_then_filter_all_yields(
     let mut infinite_children = vec![];
 
     // Use 32 distinct thresholds (each >0 and <8 192) to force 32 infinite inputs
-    for thr in 1..32 {
+    for threshold in 1..32 {
         // One infinite exec:
         let mut inf = make_lazy_exec_with_range("value", 0..i64::MAX, pretend_infinite);
 
@@ -439,7 +439,7 @@ async fn interleave_then_filter_all_yields(
         let partitioning = Partitioning::Hash(exprs, 1);
         inf.try_set_partitioning(partitioning)?;
 
-        // Apply a FilterExec: “(value / 8192) % thr == 0”.
+        // Apply a FilterExec: “(value / 8192) % threshold == 0”.
         let filter_expr = binary(
             binary(
                 binary(
@@ -449,7 +449,7 @@ async fn interleave_then_filter_all_yields(
                     &inf.schema(),
                 )?,
                 Modulo,
-                lit(thr as i64),
+                lit(threshold as i64),
                 &inf.schema(),
             )?,
             Eq,
@@ -485,7 +485,7 @@ async fn interleave_then_aggregate_yields(
     let mut infinite_children = vec![];
 
     // Use 32 distinct thresholds (each >0 and <8 192) to force 32 infinite inputs
-    for thr in 1..32 {
+    for threshold in 1..32 {
         // One infinite exec:
         let mut inf = make_lazy_exec_with_range("value", 0..i64::MAX, pretend_infinite);
 
@@ -495,7 +495,7 @@ async fn interleave_then_aggregate_yields(
         let partitioning = Partitioning::Hash(exprs, 1);
         inf.try_set_partitioning(partitioning)?;
 
-        // Apply a FilterExec: “(value / 8192) % thr == 0”.
+        // Apply a FilterExec: “(value / 8192) % threshold == 0”.
         let filter_expr = binary(
             binary(
                 binary(
@@ -505,7 +505,7 @@ async fn interleave_then_aggregate_yields(
                     &inf.schema(),
                 )?,
                 Modulo,
-                lit(thr as i64),
+                lit(threshold as i64),
                 &inf.schema(),
             )?,
             Eq,

diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -403,7 +403,7 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
              Left Plan:\n{}\n\
              Right Plan:\n{}\n\
              schema:\n{schema}\n\
-             Left Ouptut:\n{}\n\
+             Left Output:\n{}\n\
              Right Output:\n{}\n\
              input:\n{}\n\
              ",

diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs
@@ -201,7 +201,7 @@ impl Utf8Test {
         }
     }
 
-    ///  all combinations of interesting charactes  with lengths ranging from 1 to 4
+    ///  all combinations of interesting characters  with lengths ranging from 1 to 4
     fn values() -> &'static [String] {
         &VALUES
     }

diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
@@ -220,7 +220,7 @@ impl SortQueryFuzzer {
                         .test_gen
                         .fuzzer_run(init_seed, query_seed, config_seed)
                         .await?;
-                    println!("\n"); // Seperator between tested runs
+                    println!("\n"); // Separator between tested runs
 
                     if expected_results.is_none() {
                         expected_results = Some(results);

diff --git a/datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs b/datafusion/core/tests/fuzz_cases/topk_filter_pushdown.rs
@@ -219,16 +219,16 @@ struct RunQueryResult {
 }
 
 impl RunQueryResult {
-    fn expected_formated(&self) -> String {
+    fn expected_formatted(&self) -> String {
         format!("{}", pretty_format_batches(&self.expected).unwrap())
     }
 
-    fn result_formated(&self) -> String {
+    fn result_formatted(&self) -> String {
         format!("{}", pretty_format_batches(&self.result).unwrap())
     }
 
     fn is_ok(&self) -> bool {
-        self.expected_formated() == self.result_formated()
+        self.expected_formatted() == self.result_formatted()
     }
 }
 
@@ -374,8 +374,8 @@ async fn test_fuzz_topk_filter_pushdown() {
     for failure in &failures {
         println!("Failure:");
         println!("Query:\n{}", failure.query);
-        println!("\nExpected:\n{}", failure.expected_formated());
-        println!("\nResult:\n{}", failure.result_formated());
+        println!("\nExpected:\n{}", failure.expected_formatted());
+        println!("\nResult:\n{}", failure.result_formatted());
         println!("\n\n");
     }
 

diff --git a/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs b/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs
@@ -31,7 +31,7 @@ static INIT: Once = Once::new();
 
 // ===========================================================================
 // Test runners:
-// Runners are splitted into multiple tests to run in parallel
+// Runners are split into multiple tests to run in parallel
 // ===========================================================================
 
 #[test]

diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
@@ -546,7 +546,7 @@ async fn test_external_sort_zero_merge_reservation() {
 // Tests for disk limit (`max_temp_directory_size` in `DiskManager`)
 // ------------------------------------------------------------------
 
-// Create a new `SessionContext` with speicified disk limit, memory pool limit, and spill compression codec
+// Create a new `SessionContext` with specified disk limit, memory pool limit, and spill compression codec
 async fn setup_context(
     disk_limit: u64,
     memory_pool_limit: usize,

diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
@@ -441,7 +441,7 @@ impl TestConfig {
 
     /// Perform a series of runs using the current [`TestConfig`],
     /// assert the expected plan result,
-    /// and return the result plan (for potentional subsequent runs).
+    /// and return the result plan (for potential subsequent runs).
     fn run(
         &self,
         expected_lines: &[&str],
@@ -2610,7 +2610,7 @@ fn parallelization_two_partitions_into_four() -> Result<()> {
         "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
         "  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
         "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        // Multiple source files splitted across partitions
+        // Multiple source files split across partitions
         "      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=parquet",
     ];
     test_config.run(
@@ -2625,7 +2625,7 @@ fn parallelization_two_partitions_into_four() -> Result<()> {
         "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
         "  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
         "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        // Multiple source files splitted across partitions
+        // Multiple source files split across partitions
         "      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
     ];
     test_config.run(&expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-Original file line number
+Diff line change
@@ Expand Up / @@ -201,7 +201,7 @@ impl Utf8Test { @@
             }
         }
-        ///  all combinations of interesting charactes  with lengths ranging from 1 to 4
+        ///  all combinations of interesting characters  with lengths ranging from 1 to 4
         fn values() -> &'static [String] {
             &VALUES
         }
@@ Expand Down @@