Skip to content

Commit 7c00165

Browse files
committed
dan's homework
1 parent 6d36d7c commit 7c00165

File tree

8 files changed

+108
-3
lines changed

8 files changed

+108
-3
lines changed

python/pyarrow-stubs/pyarrow/_parquet.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ class ParquetWriter(_Weakrefable):
467467
sorting_columns: tuple[SortingColumn, ...] | None = None,
468468
store_decimal_as_integer: bool = False,
469469
write_time_adjusted_to_utc: bool = False,
470+
max_rows_per_page: int | None = None,
470471
): ...
471472
def close(self) -> None: ...
472473
def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...

python/pyarrow-stubs/pyarrow/_parquet_encryption.pyi

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
# under the License.
1717

1818
import datetime as dt
19+
import pathlib
1920

2021
from collections.abc import Callable
2122

23+
from pyarrow._fs import FileSystem
2224
from ._parquet import FileDecryptionProperties, FileEncryptionProperties
2325
from .lib import _Weakrefable
2426

@@ -73,7 +75,7 @@ class KmsConnectionConfig(_Weakrefable):
7375

7476
class KmsClient(_Weakrefable):
7577
def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
76-
def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
78+
def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> bytes: ...
7779

7880

7981
class CryptoFactory(_Weakrefable):
@@ -93,3 +95,47 @@ class CryptoFactory(_Weakrefable):
9395
) -> FileDecryptionProperties: ...
9496
def remove_cache_entries_for_token(self, access_token: str) -> None: ...
9597
def remove_cache_entries_for_all_tokens(self) -> None: ...
98+
def rotate_master_keys(
99+
self,
100+
kms_connection_config: KmsConnectionConfig,
101+
parquet_file_path: str | pathlib.Path,
102+
filesystem: FileSystem | None = None,
103+
double_wrapping: bool = True,
104+
cache_lifetime_seconds: int | float = 600,
105+
) -> None: ...
106+
107+
108+
class KeyMaterial(_Weakrefable):
109+
@property
110+
def is_footer_key(self) -> bool: ...
111+
@property
112+
def is_double_wrapped(self) -> bool: ...
113+
@property
114+
def master_key_id(self) -> str: ...
115+
@property
116+
def wrapped_dek(self) -> str: ...
117+
@property
118+
def kek_id(self) -> str: ...
119+
@property
120+
def wrapped_kek(self) -> str: ...
121+
@property
122+
def kms_instance_id(self) -> str: ...
123+
@property
124+
def kms_instance_url(self) -> str: ...
125+
@staticmethod
126+
def wrap(key_material: KeyMaterial) -> KeyMaterial: ...
127+
@staticmethod
128+
def parse(key_material_string: str) -> KeyMaterial: ...
129+
130+
131+
132+
class FileSystemKeyMaterialStore(_Weakrefable):
133+
def get_key_material(self, key_id: str) -> KeyMaterial: ...
134+
def get_key_id_set(self) -> list[str]: ...
135+
@classmethod
136+
def for_file(
137+
cls,
138+
parquet_file_path: str | pathlib.Path, /,
139+
filesystem: FileSystem | None = None
140+
) -> FileSystemKeyMaterialStore:
141+
...

python/pyarrow-stubs/pyarrow/compute.pyi

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,15 @@ def first(
318318
memory_pool: lib.MemoryPool | None = None,
319319
) -> _ScalarT: ...
320320

321+
def last(
322+
array: lib.Array[_ScalarT] | lib.ChunkedArray[_ScalarT],
323+
/,
324+
*,
325+
skip_nulls: bool = True,
326+
min_count: int = 1,
327+
options: ScalarAggregateOptions | None = None,
328+
memory_pool: lib.MemoryPool | None = None,
329+
) -> _ScalarT: ...
321330

322331
def first_last(
323332
array: lib.Array[Any] | lib.ChunkedArray[Any] | list[Any],
@@ -545,6 +554,12 @@ def exp(
545554
) -> (
546555
_FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression): ...
547556

557+
def expm1(
558+
exponent: _FloatArrayT | ArrayOrChunkedArray[NonFloatNumericScalar] | _FloatScalarT
559+
| NonFloatNumericScalar | lib.DoubleScalar | Expression,
560+
/, *, memory_pool: lib.MemoryPool | None = None
561+
) -> (
562+
_FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression): ...
548563

549564
multiply = _clone_signature(add)
550565
multiply_checked = _clone_signature(add)
@@ -741,10 +756,12 @@ logb_checked = _clone_signature(logb)
741756
acos = _clone_signature(ln)
742757
acos_checked = _clone_signature(ln)
743758
acosh = _clone_signature(ln)
759+
acosh_checked = _clone_signature(ln)
744760
asin = _clone_signature(ln)
745761
asin_checked = _clone_signature(ln)
746762
asinh = _clone_signature(ln)
747763
atan = _clone_signature(ln)
764+
atanh_checked = _clone_signature(ln)
748765
atanh = _clone_signature(ln)
749766
cos = _clone_signature(ln)
750767
cos_checked = _clone_signature(ln)
@@ -1171,6 +1188,15 @@ def index_in(
11711188
memory_pool: lib.MemoryPool | None = None,
11721189
) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
11731190

1191+
def index_in_meta_binary(
1192+
values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
1193+
/,
1194+
value_set: lib.Array | lib.ChunkedArray | Expression,
1195+
*,
1196+
skip_nulls: bool = False,
1197+
options: SetLookupOptions | None = None,
1198+
memory_pool: lib.MemoryPool | None = None,
1199+
) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
11741200

11751201
def is_in(
11761202
values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
@@ -1182,6 +1208,15 @@ def is_in(
11821208
memory_pool: lib.MemoryPool | None = None,
11831209
) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
11841210

1211+
def is_in_meta_binary(
1212+
values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
1213+
/,
1214+
value_set: lib.Array | lib.ChunkedArray | Expression,
1215+
*,
1216+
skip_nulls: bool = False,
1217+
options: SetLookupOptions | None = None,
1218+
memory_pool: lib.MemoryPool | None = None,
1219+
) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
11851220

11861221
match_like = _clone_signature(ends_with)
11871222
match_substring = _clone_signature(ends_with)
@@ -1595,6 +1630,22 @@ def array_filter(
15951630
def drop_null(input: _ArrayT | Expression, /, *, memory_pool: lib.MemoryPool |
15961631
None = None) -> _ArrayT | Expression: ...
15971632

1633+
def inverse_permutation(
1634+
permutation: lib.UInt32Array | lib.UInt64Array | Expression,
1635+
/,
1636+
*,
1637+
memory_pool: lib.MemoryPool | None = None,
1638+
) -> lib.UInt32Array | lib.UInt64Array | Expression: ...
1639+
1640+
def scatter(
1641+
array: _ArrayT | Expression,
1642+
indices: lib.UInt32Array | lib.UInt64Array | Expression,
1643+
/,
1644+
fill_value: _ScalarOrArrayT | Expression | None = None,
1645+
*,
1646+
options: ScatterOptions | None = None,
1647+
memory_pool: lib.MemoryPool | None = None,
1648+
) -> _ArrayT | Expression: ...
15981649

15991650
filter = array_filter
16001651
take = array_take

python/pyarrow-stubs/pyarrow/parquet/core.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ class ParquetWriter:
198198
write_page_checksum: bool = False,
199199
sorting_columns: Sequence[SortingColumn] | None = None,
200200
store_decimal_as_integer: bool = False,
201+
max_rows_per_page: int | None = None,
201202
**options,
202203
) -> None: ...
203204
def __enter__(self) -> Self: ...

python/pyarrow-stubs/pyarrow/parquet/encryption.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ from pyarrow._parquet_encryption import (
1919
CryptoFactory,
2020
DecryptionConfiguration,
2121
EncryptionConfiguration,
22+
FileSystemKeyMaterialStore,
2223
KmsClient,
2324
KmsConnectionConfig,
2425
)
@@ -27,6 +28,7 @@ __all__ = [
2728
"CryptoFactory",
2829
"DecryptionConfiguration",
2930
"EncryptionConfiguration",
31+
"FileSystemKeyMaterialStore",
3032
"KmsClient",
3133
"KmsConnectionConfig",
3234
]

python/pyarrow/parquet/core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1908,7 +1908,9 @@ def read_table(source, *, columns=None, use_threads=True,
19081908

19091909
filesystem, path = _resolve_filesystem_and_path(source, filesystem)
19101910
if filesystem is not None:
1911-
if not filesystem.get_file_info(path).is_file:
1911+
file_info = filesystem.get_file_info(path)
1912+
assert isinstance(file_info, FileInfo)
1913+
if not file_info.is_file:
19121914
raise ValueError(
19131915
"the 'source' argument should be "
19141916
"an existing parquet file and not a directory "

python/pyarrow/tests/parquet/test_encryption.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def test_encrypted_parquet_write_read_external(tempdir, data_table,
553553
result_table = read_encrypted_parquet(
554554
path, decryption_config, kms_connection_config, crypto_factory,
555555
internal_key_material=False)
556-
store = pa._parquet_encryption.FileSystemKeyMaterialStore.for_file(path)
556+
store = pe.FileSystemKeyMaterialStore.for_file(path)
557557

558558
assert len(key_ids := store.get_key_id_set()) == (
559559
len(external_encryption_config.column_keys[COL_KEY_NAME]) + 1)

python/pyarrow/tests/test_flight.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ class EchoTableStreamFlightServer(EchoFlightServer):
266266
"""An echo server that streams the whole table."""
267267

268268
def do_get(self, context, ticket):
269+
assert self.last_message is not None
269270
return flight.GeneratorStream(
270271
self.last_message.schema,
271272
[self.last_message])
@@ -283,6 +284,7 @@ class EchoRecordBatchReaderStreamFlightServer(EchoFlightServer):
283284
"""An echo server that streams the whole table as a RecordBatchReader."""
284285

285286
def do_get(self, context, ticket):
287+
assert self.last_message is not None
286288
return flight.GeneratorStream(
287289
self.last_message.schema,
288290
[self.last_message.to_reader()])

0 commit comments

Comments
 (0)