Skip to content

Commit f134d4a

Browse files
committed
fix some annotations
1 parent 6b6fde3 commit f134d4a

File tree

11 files changed

+45
-30
lines changed

11 files changed

+45
-30
lines changed

python/pyarrow-stubs/pyarrow/_dataset_parquet.pyi

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,19 @@ class ParquetFileFormat(FileFormat):
4646
self,
4747
read_options: ParquetReadOptions | None = None,
4848
default_fragment_scan_options: ParquetFragmentScanOptions | None = None,
49-
**kwargs,
49+
*,
50+
pre_buffer: bool = True,
51+
coerce_int96_timestamp_unit: str | None = None,
52+
thrift_string_size_limit: int | None = None,
53+
thrift_container_size_limit: int | None = None,
54+
page_checksum_verification: bool = False,
55+
arrow_extensions_enabled: bool = True,
56+
binary_type: DataType | None = None,
57+
list_type: type[ListType | LargeListType] | None = None,
58+
use_buffered_stream: bool = False,
59+
buffer_size: int = 8192,
60+
dictionary_columns: list[str] | set[str] | None = None,
61+
decryption_properties: FileDecryptionProperties | None = None,
5062
) -> None: ...
5163
@property
5264
def read_options(self) -> ParquetReadOptions: ...
@@ -60,7 +72,6 @@ class ParquetFileFormat(FileFormat):
6072
def make_fragment(
6173
self,
6274
file: StrPath | IO | Buffer | BufferReader,
63-
6475
filesystem: SupportedFileSystem | None = None,
6576
partition_expression: Expression | None = None,
6677
row_groups: Iterable[int] | None = None,

python/pyarrow/tests/parquet/test_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,7 @@ def _test_write_to_dataset_with_partitions(base_path,
971971
input_df_cols = input_df.columns.tolist()
972972
assert partition_by == input_df_cols[-1 * len(partition_by):]
973973

974-
input_df = input_df[cols]
974+
input_df = input_df[cols] # type: ignore[assignment]
975975
# Partitioned columns become 'categorical' dtypes
976976
for col in partition_by:
977977
output_df[col] = output_df[col].astype('category')
@@ -1027,7 +1027,7 @@ def _test_write_to_dataset_no_partitions(base_path,
10271027
).read()
10281028
input_df = input_table.to_pandas()
10291029
input_df = input_df.drop_duplicates()
1030-
input_df = input_df[cols]
1030+
input_df = input_df[cols] # type: ignore[assignment]
10311031
tm.assert_frame_equal(output_df, input_df)
10321032

10331033

python/pyarrow/tests/parquet/test_pandas.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -571,15 +571,15 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
571571
table, str(tempdir / "case1"), partition_cols=['part'],
572572
)
573573
result = pq.read_table(str(tempdir / "case1")).to_pandas()
574-
tm.assert_frame_equal(result[["col"]], cast(pd.DataFrame, df[["col"]]))
574+
tm.assert_frame_equal(cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
575575

576576
pq.write_to_dataset(table, str(tempdir / "case2"))
577577
result = pq.read_table(str(tempdir / "case2")).to_pandas()
578-
tm.assert_frame_equal(result[["col"]], cast(pd.DataFrame, df[["col"]]))
578+
tm.assert_frame_equal(cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
579579

580580
pq.write_table(table, str(tempdir / "data.parquet"))
581581
result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
582-
tm.assert_frame_equal(result[["col"]], cast(pd.DataFrame, df[["col"]]))
582+
tm.assert_frame_equal(cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
583583

584584

585585
@pytest.mark.pandas

python/pyarrow/tests/parquet/test_parquet_file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ def get_all_batches(f):
262262

263263
tm.assert_frame_equal(
264264
batches[batch_no].to_pandas().reset_index(drop=True),
265-
file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index(
265+
file_.read_row_groups([i]).to_pandas().iloc[900:].reset_index( # type: ignore[arg-type]
266266
drop=True
267267
)
268268
)

python/pyarrow/tests/test_acero.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,13 +274,13 @@ def test_order_by():
274274
expected = pa.table({"a": [1, 4, 2, 3], "b": [1, 2, 3, None]})
275275
assert result.equals(expected)
276276

277-
ord_opts = OrderByNodeOptions([(field("b"), "descending")])
277+
ord_opts = OrderByNodeOptions([(field("b"), "descending")]) # type: ignore[arg-type]
278278
decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
279279
result = decl.to_table()
280280
expected = pa.table({"a": [2, 4, 1, 3], "b": [3, 2, 1, None]})
281281
assert result.equals(expected)
282282

283-
ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start")
283+
ord_opts = OrderByNodeOptions([(1, "descending")], null_placement="at_start") # type: ignore[arg-type]
284284
decl = Declaration.from_sequence([table_source, Declaration("order_by", ord_opts)])
285285
result = decl.to_table()
286286
expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})

python/pyarrow/tests/test_array.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,24 +1186,24 @@ def test_map_from_arrays():
11861186
keys = pa.array(pykeys, type='binary')
11871187
items = pa.array(pyitems, type='i4')
11881188

1189-
result = pa.MapArray.from_arrays(offsets, keys, items)
1189+
result = pa.MapArray.from_arrays(offsets, keys, items) # type: ignore[arg-type]
11901190
expected = pa.array(pyentries, type=pa.map_(pa.binary(), pa.int32()))
11911191

11921192
assert result.equals(expected)
11931193

11941194
# pass in the type explicitly
1195-
result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
1195+
result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_( # type: ignore[arg-type]
11961196
keys.type,
11971197
items.type
11981198
))
11991199
assert result.equals(expected)
12001200

12011201
# pass in invalid types
12021202
with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
1203-
pa.MapArray.from_arrays(offsets, keys, items, pa.string())
1203+
pa.MapArray.from_arrays(offsets, keys, items, pa.string()) # type: ignore[arg-type]
12041204

12051205
with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
1206-
pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
1206+
pa.MapArray.from_arrays(offsets, keys, items, pa.map_( # type: ignore[arg-type]
12071207
keys.type,
12081208
# Larger than the original i4
12091209
pa.int64()
@@ -1241,7 +1241,7 @@ def test_map_from_arrays():
12411241
# error if null bitmap and offsets with nulls passed
12421242
msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
12431243
with pytest.raises(pa.ArrowInvalid, match=msg1):
1244-
pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
1244+
pa.MapArray.from_arrays(offsets, keys, items, pa.map_( # type: ignore[arg-type]
12451245
keys.type,
12461246
items.type),
12471247
mask=pa.array([False, True, False], type=pa.bool_())
@@ -2649,7 +2649,7 @@ def test_interval_array_from_relativedelta():
26492649
assert arr.type == pa.month_day_nano_interval()
26502650
expected_list = [
26512651
None,
2652-
pa.MonthDayNano([13, 8,
2652+
pa.MonthDayNano([13, 8, # type: ignore[arg-type]
26532653
(datetime.timedelta(seconds=1, microseconds=1,
26542654
minutes=1, hours=1) //
26552655
datetime.timedelta(microseconds=1)) * 1000])]
@@ -2682,7 +2682,7 @@ def test_interval_array_from_tuple():
26822682
assert arr.type == pa.month_day_nano_interval()
26832683
expected_list = [
26842684
None,
2685-
pa.MonthDayNano([1, 2, -3])]
2685+
pa.MonthDayNano([1, 2, -3])] # type: ignore[arg-type]
26862686
expected = pa.array(expected_list)
26872687
assert arr.equals(expected)
26882688
assert arr.to_pylist() == expected_list
@@ -2703,8 +2703,8 @@ def test_interval_array_from_dateoffset():
27032703
assert arr.type == pa.month_day_nano_interval()
27042704
expected_list = [
27052705
None,
2706-
pa.MonthDayNano([13, 8, 3661000001001]),
2707-
pa.MonthDayNano([0, 0, 0])]
2706+
pa.MonthDayNano([13, 8, 3661000001001]), # type: ignore[arg-type]
2707+
pa.MonthDayNano([0, 0, 0])] # type: ignore[arg-type]
27082708
expected = pa.array(expected_list)
27092709
assert arr.equals(expected)
27102710
expected_from_pandas = [

python/pyarrow/tests/test_compute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1797,7 +1797,7 @@ def test_round_to_multiple():
17971797
for multiple in [0, -2, pa.scalar(-10.4)]:
17981798
with pytest.raises(pa.ArrowInvalid,
17991799
match="Rounding multiple must be positive"):
1800-
pc.round_to_multiple(values, multiple=multiple)
1800+
pc.round_to_multiple(values, multiple=multiple) # type: ignore[arg-type]
18011801

18021802
for multiple in [object, 99999999999999999999999]:
18031803
with pytest.raises(TypeError, match="is not a valid multiple type"):

python/pyarrow/tests/test_fs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,19 +1440,19 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
14401440
S3FileSystem(proxy_options=('http', 'localhost', 9090))
14411441
# Missing scheme
14421442
with pytest.raises(KeyError):
1443-
S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090})
1443+
S3FileSystem(proxy_options={'host': 'localhost', 'port': 9090}) # type: ignore[missing-typed-dict-key]
14441444
# Missing host
14451445
with pytest.raises(KeyError):
1446-
S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090})
1446+
S3FileSystem(proxy_options={'scheme': 'https', 'port': 9090}) # type: ignore[missing-typed-dict-key]
14471447
# Missing port
14481448
with pytest.raises(KeyError):
1449-
S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
1449+
S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'}) # type: ignore[missing-typed-dict-key]
14501450
# Invalid proxy URI (invalid scheme httpsB)
14511451
with pytest.raises(pa.ArrowInvalid):
14521452
S3FileSystem(proxy_options='httpsB://localhost:9000')
14531453
# Invalid proxy_options dict (invalid scheme httpA)
14541454
with pytest.raises(pa.ArrowInvalid):
1455-
S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost',
1455+
S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost', # type: ignore[typeddict-item]
14561456
'port': 8999})
14571457

14581458

python/pyarrow/tests/test_ipc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def test_stream_write_table_batches(stream_fixture):
393393
'one': np.random.randn(20),
394394
})
395395

396-
b1 = pa.RecordBatch.from_pandas(df[:10], preserve_index=False)
396+
b1 = pa.RecordBatch.from_pandas(df[:10], preserve_index=False) # type: ignore[arg-type]
397397
b2 = pa.RecordBatch.from_pandas(df, preserve_index=False)
398398

399399
table = pa.Table.from_batches([b1, b2, b1])
@@ -976,7 +976,7 @@ def test_batches_with_custom_metadata_roundtrip(ipc_type):
976976

977977
with file_factory(sink, batch.schema) as writer:
978978
for i in range(batch_count):
979-
writer.write_batch(batch, custom_metadata={"batch_id": str(i)})
979+
writer.write_batch(batch, custom_metadata={"batch_id": str(i)}) # type: ignore[arg-type]
980980
# write a batch without custom metadata
981981
writer.write_batch(batch)
982982

python/pyarrow/tests/test_scalars.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,9 @@ def test_timestamp():
424424
expected = pd.Timestamp('2000-01-01 12:34:56')
425425

426426
assert arrow_arr[0].as_py() == expected
427-
assert cast(pa.TimestampScalar, arrow_arr[0]).value * 1000**i == expected.value
427+
value = cast(pa.TimestampScalar, arrow_arr[0]).value
428+
assert value is not None
429+
assert value * 1000**i == expected.value
428430

429431
tz = 'America/New_York'
430432
arrow_type = pa.timestamp(unit, tz=tz)
@@ -436,7 +438,9 @@ def test_timestamp():
436438
.tz_convert(tz))
437439

438440
assert arrow_arr[0].as_py() == expected
439-
assert cast(pa.TimestampScalar, arrow_arr[0]).value * 1000**i == expected.value
441+
value = cast(pa.TimestampScalar, arrow_arr[0]).value
442+
assert value is not None
443+
assert value * 1000**i == expected.value
440444

441445

442446
@pytest.mark.nopandas
@@ -531,7 +535,7 @@ def test_duration_nanos_nopandas():
531535

532536

533537
def test_month_day_nano_interval():
534-
triple = pa.MonthDayNano([-3600, 1800, -50])
538+
triple = pa.MonthDayNano([-3600, 1800, -50]) # type: ignore[invalid-argument-type]
535539
arr = pa.array([triple])
536540
assert isinstance(arr[0].as_py(), pa.MonthDayNano)
537541
assert arr[0].as_py() == triple

0 commit comments

Comments
 (0)