Skip to content

Commit 83bc664

Browse files
committed
dan's homework
1 parent 6d36d7c commit 83bc664

File tree

9 files changed

+71
-4
lines changed

9 files changed

+71
-4
lines changed

python/pyarrow-stubs/pyarrow/_parquet.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ class ParquetWriter(_Weakrefable):
467467
sorting_columns: tuple[SortingColumn, ...] | None = None,
468468
store_decimal_as_integer: bool = False,
469469
write_time_adjusted_to_utc: bool = False,
470+
max_rows_per_page: int | None = None,
470471
): ...
471472
def close(self) -> None: ...
472473
def write_table(self, table: Table, row_group_size: int | None = None) -> None: ...

python/pyarrow-stubs/pyarrow/_parquet_encryption.pyi

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
# under the License.
1717

1818
import datetime as dt
19+
import pathlib
1920

2021
from collections.abc import Callable
2122

23+
from pyarrow._fs import FileSystem
2224
from ._parquet import FileDecryptionProperties, FileEncryptionProperties
2325
from .lib import _Weakrefable
2426

@@ -73,7 +75,7 @@ class KmsConnectionConfig(_Weakrefable):
7375

7476
class KmsClient(_Weakrefable):
7577
def wrap_key(self, key_bytes: bytes, master_key_identifier: str) -> str: ...
76-
def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> str: ...
78+
def unwrap_key(self, wrapped_key: str, master_key_identifier: str) -> bytes: ...
7779

7880

7981
class CryptoFactory(_Weakrefable):
@@ -93,3 +95,47 @@ class CryptoFactory(_Weakrefable):
9395
) -> FileDecryptionProperties: ...
9496
def remove_cache_entries_for_token(self, access_token: str) -> None: ...
9597
def remove_cache_entries_for_all_tokens(self) -> None: ...
98+
def rotate_master_keys(
99+
self,
100+
kms_connection_config: KmsConnectionConfig,
101+
parquet_file_path: str | pathlib.Path,
102+
filesystem: FileSystem | None = None,
103+
double_wrapping: bool = True,
104+
cache_lifetime_seconds: int | float = 600,
105+
) -> None: ...
106+
107+
108+
class KeyMaterial(_Weakrefable):
109+
@property
110+
def is_footer_key(self) -> bool: ...
111+
@property
112+
def is_double_wrapped(self) -> bool: ...
113+
@property
114+
def master_key_id(self) -> str: ...
115+
@property
116+
def wrapped_dek(self) -> str: ...
117+
@property
118+
def kek_id(self) -> str: ...
119+
@property
120+
def wrapped_kek(self) -> str: ...
121+
@property
122+
def kms_instance_id(self) -> str: ...
123+
@property
124+
def kms_instance_url(self) -> str: ...
125+
@staticmethod
126+
def wrap(key_material: KeyMaterial) -> KeyMaterial: ...
127+
@staticmethod
128+
def parse(key_material_string: str) -> KeyMaterial: ...
129+
130+
131+
132+
class FileSystemKeyMaterialStore(_Weakrefable):
133+
def get_key_material(self, key_id: str) -> KeyMaterial: ...
134+
def get_key_id_set(self) -> list[str]: ...
135+
@classmethod
136+
def for_file(
137+
cls,
138+
parquet_file_path: str | pathlib.Path, /,
139+
filesystem: FileSystem | None = None
140+
) -> FileSystemKeyMaterialStore:
141+
...

python/pyarrow-stubs/pyarrow/compute.pyi

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ def first(
318318
memory_pool: lib.MemoryPool | None = None,
319319
) -> _ScalarT: ...
320320

321+
last = _clone_signature(first)
321322

322323
def first_last(
323324
array: lib.Array[Any] | lib.ChunkedArray[Any] | list[Any],
@@ -546,6 +547,7 @@ def exp(
546547
_FloatArrayT | lib.DoubleArray | _FloatScalarT | lib.DoubleScalar | Expression): ...
547548

548549

550+
expm1 = _clone_signature(exp)
549551
multiply = _clone_signature(add)
550552
multiply_checked = _clone_signature(add)
551553

@@ -741,10 +743,12 @@ logb_checked = _clone_signature(logb)
741743
acos = _clone_signature(ln)
742744
acos_checked = _clone_signature(ln)
743745
acosh = _clone_signature(ln)
746+
acosh_checked = _clone_signature(ln)
744747
asin = _clone_signature(ln)
745748
asin_checked = _clone_signature(ln)
746749
asinh = _clone_signature(ln)
747750
atan = _clone_signature(ln)
751+
atanh_checked = _clone_signature(ln)
748752
atanh = _clone_signature(ln)
749753
cos = _clone_signature(ln)
750754
cos_checked = _clone_signature(ln)
@@ -1171,6 +1175,13 @@ def index_in(
11711175
memory_pool: lib.MemoryPool | None = None,
11721176
) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
11731177

1178+
def index_in_meta_binary(
1179+
values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
1180+
value_set: lib.Array | lib.ChunkedArray | Expression,
1181+
/,
1182+
*,
1183+
memory_pool: lib.MemoryPool | None = None,
1184+
) -> lib.Int32Scalar | lib.Int32Array | Expression: ...
11741185

11751186
def is_in(
11761187
values: lib.Scalar | lib.Array | lib.ChunkedArray | Expression,
@@ -1183,6 +1194,7 @@ def is_in(
11831194
) -> lib.BooleanScalar | lib.BooleanArray | Expression: ...
11841195

11851196

1197+
is_in_meta_binary = _clone_signature(index_in_meta_binary)
11861198
match_like = _clone_signature(ends_with)
11871199
match_substring = _clone_signature(ends_with)
11881200
match_substring_regex = _clone_signature(ends_with)

python/pyarrow-stubs/pyarrow/parquet/core.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ class ParquetWriter:
198198
write_page_checksum: bool = False,
199199
sorting_columns: Sequence[SortingColumn] | None = None,
200200
store_decimal_as_integer: bool = False,
201+
max_rows_per_page: int | None = None,
201202
**options,
202203
) -> None: ...
203204
def __enter__(self) -> Self: ...

python/pyarrow-stubs/pyarrow/parquet/encryption.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ from pyarrow._parquet_encryption import (
1919
CryptoFactory,
2020
DecryptionConfiguration,
2121
EncryptionConfiguration,
22+
FileSystemKeyMaterialStore,
2223
KmsClient,
2324
KmsConnectionConfig,
2425
)
@@ -27,6 +28,7 @@ __all__ = [
2728
"CryptoFactory",
2829
"DecryptionConfiguration",
2930
"EncryptionConfiguration",
31+
"FileSystemKeyMaterialStore",
3032
"KmsClient",
3133
"KmsConnectionConfig",
3234
]

python/pyarrow/parquet/core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1908,7 +1908,9 @@ def read_table(source, *, columns=None, use_threads=True,
19081908

19091909
filesystem, path = _resolve_filesystem_and_path(source, filesystem)
19101910
if filesystem is not None:
1911-
if not filesystem.get_file_info(path).is_file:
1911+
file_info = filesystem.get_file_info(path)
1912+
assert isinstance(file_info, FileInfo)
1913+
if not file_info.is_file:
19121914
raise ValueError(
19131915
"the 'source' argument should be "
19141916
"an existing parquet file and not a directory "

python/pyarrow/parquet/encryption.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,5 @@
2020
EncryptionConfiguration,
2121
DecryptionConfiguration,
2222
KmsConnectionConfig,
23-
KmsClient)
23+
KmsClient,
24+
FileSystemKeyMaterialStore)

python/pyarrow/tests/parquet/test_encryption.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def test_encrypted_parquet_write_read_external(tempdir, data_table,
553553
result_table = read_encrypted_parquet(
554554
path, decryption_config, kms_connection_config, crypto_factory,
555555
internal_key_material=False)
556-
store = pa._parquet_encryption.FileSystemKeyMaterialStore.for_file(path)
556+
store = pe.FileSystemKeyMaterialStore.for_file(path)
557557

558558
assert len(key_ids := store.get_key_id_set()) == (
559559
len(external_encryption_config.column_keys[COL_KEY_NAME]) + 1)

python/pyarrow/tests/test_flight.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ class EchoTableStreamFlightServer(EchoFlightServer):
266266
"""An echo server that streams the whole table."""
267267

268268
def do_get(self, context, ticket):
269+
assert self.last_message is not None
269270
return flight.GeneratorStream(
270271
self.last_message.schema,
271272
[self.last_message])
@@ -283,6 +284,7 @@ class EchoRecordBatchReaderStreamFlightServer(EchoFlightServer):
283284
"""An echo server that streams the whole table as a RecordBatchReader."""
284285

285286
def do_get(self, context, ticket):
287+
assert self.last_message is not None
286288
return flight.GeneratorStream(
287289
self.last_message.schema,
288290
[self.last_message.to_reader()])

0 commit comments

Comments
 (0)