fix some annotations

rok · rok · commit f134d4a57f9a · 2025-11-11T17:08:48.000+01:00
diff --git a/python/pyarrow-stubs/pyarrow/_dataset_parquet.pyi b/python/pyarrow-stubs/pyarrow/_dataset_parquet.pyi
@@ -46,7 +46,19 @@ class ParquetFileFormat(FileFormat):
         self,
         read_options: ParquetReadOptions | None = None,
         default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
-        **kwargs,
+        *,
+        pre_buffer: bool = True,
+        coerce_int96_timestamp_unit: str | None = None,
+        thrift_string_size_limit: int | None = None,
+        thrift_container_size_limit: int | None = None,
+        page_checksum_verification: bool = False,
+        arrow_extensions_enabled: bool = True,
+        binary_type: DataType | None = None,
+        list_type: type[ListType | LargeListType] | None = None,
+        use_buffered_stream: bool = False,
+        buffer_size: int = 8192,
+        dictionary_columns: list[str] | set[str] | None = None,
+        decryption_properties: FileDecryptionProperties | None = None,
     ) -> None: ...
     @property
     def read_options(self) -> ParquetReadOptions: ...
@@ -60,7 +72,6 @@ class ParquetFileFormat(FileFormat):
     def make_fragment(
         self,
         file: StrPath | IO | Buffer | BufferReader,
-
         filesystem: SupportedFileSystem | None = None,
         partition_expression: Expression | None = None,
         row_groups: Iterable[int] | None = None,
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
@@ -971,7 +971,7 @@ def _test_write_to_dataset_with_partitions(base_path,
     input_df_cols = input_df.columns.tolist()
     assert partition_by == input_df_cols[-1 * len(partition_by):]
 
-    input_df = input_df[cols]
+    input_df = input_df[cols]  # type: ignore[assignment]
     # Partitioned columns become 'categorical' dtypes
     for col in partition_by:
         output_df[col] = output_df[col].astype('category')
@@ -1027,7 +1027,7 @@ def _test_write_to_dataset_no_partitions(base_path,
     ).read()
     input_df = input_table.to_pandas()
     input_df = input_df.drop_duplicates()
-    input_df = input_df[cols]
+    input_df = input_df[cols]  # type: ignore[assignment]
     tm.assert_frame_equal(output_df, input_df)
 
 
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
@@ -571,15 +571,15 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
         table, str(tempdir / "case1"), partition_cols=['part'],
     )
     result = pq.read_table(str(tempdir / "case1")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], cast(pd.DataFrame, df[["col"]]))
+    tm.assert_frame_equal(cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
 
     pq.write_to_dataset(table, str(tempdir / "case2"))
     result = pq.read_table(str(tempdir / "case2")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], cast(pd.DataFrame, df[["col"]]))
+    tm.assert_frame_equal(cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
 
     pq.write_table(table, str(tempdir / "data.parquet"))
     result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
-    tm.assert_frame_equal(result[["col"]], cast(pd.DataFrame, df[["col"]]))
+    tm.assert_frame_equal(cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -262,7 +262,7 @@ def get_all_batches(f):
 
         tm.assert_frame_equal(
             batches[batch_no].to_pandas().reset_index(drop=True),
-            file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index(
+            file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index(  # type: ignore[arg-type]
                 drop=True
             )
         )
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
@@ -274,13 +274,13 @@ def test_order_by():
     expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]})
     assert result.equals(expected)
 
-    ord_opts = OrderByNodeOptions([(field("b"), "descending")])
+    ord_opts = OrderByNodeOptions([(field("b"), "descending")])  # type: ignore[arg-type]
     decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]})
     assert result.equals(expected)
 
-    ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")
+    ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")  # type: ignore[arg-type]
     decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
     result = decl.to_table()
     expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
@@ -1186,24 +1186,24 @@ def test_map_from_arrays():
     keys = pa.array(pykeys, type='binary')
     items = pa.array(pyitems, type='i4')
 
-    result = pa.MapArray.from_arrays(offsets, keys, items)
+    result = pa.MapArray.from_arrays(offsets, keys, items)  # type: ignore[arg-type]
     expected = pa.array(pyentries, type=pa.map_(pa.binary(), pa.int32()))
 
     assert result.equals(expected)
 
     # pass in the type explicitly
-    result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+    result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
         keys.type,
         items.type
     ))
     assert result.equals(expected)
 
     # pass in invalid types
     with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.string())
+        pa.MapArray.from_arrays(offsets, keys, items, pa.string())  # type: ignore[arg-type]
 
     with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
             keys.type,
             # Larger than the original i4
             pa.int64()
@@ -1241,7 +1241,7 @@ def test_map_from_arrays():
     # error if null bitmap and offsets with nulls passed
     msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
     with pytest.raises(pa.ArrowInvalid, match=msg1):
-        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(  # type: ignore[arg-type]
             keys.type,
             items.type),
             mask=pa.array([False, True, False], type=pa.bool_())
@@ -2649,7 +2649,7 @@ def test_interval_array_from_relativedelta():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([13, 8,
+        pa.MonthDayNano([13, 8,  # type: ignore[arg-type]
                          (datetime.timedelta(seconds=1, microseconds=1,
                                              minutes=1, hours=1) //
                           datetime.timedelta(microseconds=1)) * 1000])]
@@ -2682,7 +2682,7 @@ def test_interval_array_from_tuple():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([1, 2, -3])]
+        pa.MonthDayNano([1, 2, -3])]  # type: ignore[arg-type]
     expected = pa.array(expected_list)
     assert arr.equals(expected)
     assert arr.to_pylist() == expected_list
@@ -2703,8 +2703,8 @@ def test_interval_array_from_dateoffset():
     assert arr.type == pa.month_day_nano_interval()
     expected_list = [
         None,
-        pa.MonthDayNano([13, 8, 3661000001001]),
-        pa.MonthDayNano([0, 0, 0])]
+        pa.MonthDayNano([13, 8, 3661000001001]),  # type: ignore[arg-type]
+        pa.MonthDayNano([0, 0, 0])]  # type: ignore[arg-type]
     expected = pa.array(expected_list)
     assert arr.equals(expected)
     expected_from_pandas = [
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
@@ -1797,7 +1797,7 @@ def test_round_to_multiple():
     for multiple in [0, -2, pa.scalar(-10.4)]:
         with pytest.raises(pa.ArrowInvalid,
                            match="Rounding multiple must be positive"):
-            pc.round_to_multiple(values, multiple=multiple)
+            pc.round_to_multiple(values, multiple=multiple)  # type: ignore[arg-type]
 
     for multiple in [object, 99999999999999999999999]:
         with pytest.raises(TypeError, match="is not a valid multiple type"):
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
@@ -1440,19 +1440,19 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
         S3FileSystem(proxy_options=('http', 'localhost', 9090))
     # Missing scheme
     with pytest.raises(KeyError):
-        S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090})
+        S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090})  # type: ignore[missing-typed-dict-key]
     # Missing host
     with pytest.raises(KeyError):
-        S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090})
+        S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090})  # type: ignore[missing-typed-dict-key]
     # Missing port
     with pytest.raises(KeyError):
-        S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
+        S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})  # type: ignore[missing-typed-dict-key]
     # Invalid proxy URI (invalid scheme httpsB)
     with pytest.raises(pa.ArrowInvalid):
         S3FileSystem(proxy_options='httpsB://localhost:9000')
     # Invalid proxy_options dict (invalid scheme httpA)
     with pytest.raises(pa.ArrowInvalid):
-        S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost',
+        S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost',  # type: ignore[typeddict-item]
                                     'port': 8999})
 
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
@@ -393,7 +393,7 @@ def test_stream_write_table_batches(stream_fixture):
         'one': np.random.randn(20),
     })
 
-    b1 = pa.RecordBatch.from_pandas(df[:10], preserve_index=False)
+    b1 = pa.RecordBatch.from_pandas(df[:10], preserve_index=False)  # type: ignore[arg-type]
     b2 = pa.RecordBatch.from_pandas(df, preserve_index=False)
 
     table = pa.Table.from_batches([b1, b2, b1])
@@ -976,7 +976,7 @@ def test_batches_with_custom_metadata_roundtrip(ipc_type):
 
     with file_factory(sink, batch.schema) as writer:
         for i in range(batch_count):
-            writer.write_batch(batch, custom_metadata={"batch_id": str(i)})
+            writer.write_batch(batch, custom_metadata={"batch_id": str(i)})  # type: ignore[arg-type]
         # write a batch without custom metadata
         writer.write_batch(batch)
 
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
@@ -424,7 +424,9 @@ def test_timestamp():
         expected = pd.Timestamp('2000-01-01 12:34:56')
 
         assert arrow_arr[0].as_py() == expected
-        assert cast(pa.TimestampScalar, arrow_arr[0]).value * 1000**i == expected.value
+        value = cast(pa.TimestampScalar, arrow_arr[0]).value
+        assert value is not None
+        assert value * 1000**i == expected.value
 
         tz = 'America/New_York'
         arrow_type = pa.timestamp(unit, tz=tz)
@@ -436,7 +438,9 @@ def test_timestamp():
                     .tz_convert(tz))
 
         assert arrow_arr[0].as_py() == expected
-        assert cast(pa.TimestampScalar, arrow_arr[0]).value * 1000**i == expected.value
+        value = cast(pa.TimestampScalar, arrow_arr[0]).value
+        assert value is not None
+        assert value * 1000**i == expected.value
 
 
 @pytest.mark.nopandas
@@ -531,7 +535,7 @@ def test_duration_nanos_nopandas():
 
 
 def test_month_day_nano_interval():
-    triple = pa.MonthDayNano([-3600, 1800, -50])
+    triple = pa.MonthDayNano([-3600, 1800, -50])  # type: ignore[invalid-argument-type]
     arr = pa.array([triple])
     assert isinstance(arr[0].as_py(), pa.MonthDayNano)
     assert arr[0].as_py() == triple
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
@@ -548,7 +548,7 @@ def test_schema_equals_invalid_type():
 
     for val in [None, 'string', pa.array([1, 2])]:
         with pytest.raises(TypeError):
-            schema.equals(val)
+            schema.equals(val)  # type: ignore[invalid-argument-type]
 
 
 def test_schema_equality_operators():

Original file line number	Diff line number	Diff line change
`@@ -262,7 +262,7 @@ def get_all_batches(f):`
`262`	`262`
`263`	`263`	`tm.assert_frame_equal(`
`264`	`264`	`batches[batch_no].to_pandas().reset_index(drop=True),`
`265`		`- file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index(`
	`265`	`+ file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index( # type: ignore[arg-type]`
`266`	`266`	`drop=True`
`267`	`267`	`)`
`268`	`268`	`)`