Skip to content

Commit 7ca3af7

Browse files
authored
Add exception when trying to write an index or a column with a boolean name (#2667)
#### Reference Issues/PRs Monday ref: 9911125848 #### What does this implement or fix? #### Any other comments? #### Checklist <details> <summary> Checklist for code changes... </summary> - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes? </details> <!-- Thanks for contributing a Pull Request to ArcticDB! Please ensure you have taken a look at: - ArcticDB's Code of Conduct: https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md - ArcticDB's Contribution Licensing: https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing -->
1 parent 62e9b31 commit 7ca3af7

File tree

5 files changed

+154
-18
lines changed

5 files changed

+154
-18
lines changed

python/arcticdb/version_store/_normalization.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,7 @@ def _normalize_single_index(
361361
return [], []
362362
elif isinstance(index, RangeIndex):
363363
if index.name:
364-
if not isinstance(index.name, int) and not isinstance(index.name, str):
365-
raise NormalizationException(
366-
f"Index name must be a string or an int, received {index.name} of type {type(index.name)}"
367-
)
364+
_check_valid_name(index.name)
368365
if isinstance(index.name, int):
369366
index_norm.is_int = True
370367
index_norm.name = str(index.name)
@@ -400,10 +397,7 @@ def _normalize_single_index(
400397
if index_tz is not None:
401398
index_norm.tz = _ensure_str_timezone(index_tz)
402399

403-
if not isinstance(index_names[0], int) and not isinstance(index_names[0], str):
404-
raise NormalizationException(
405-
f"Index name must be a string or an int, received {index_names[0]} of type {type(index_names[0])}"
406-
)
400+
_check_valid_name(index_names[0])
407401
# Currently, we only support a single index column
408402
# when we support multi-index, we will need to implement a similar logic to the one in _normalize_columns_names
409403
# in the mean time, we will cast all other index names to string, so we don't crash in the cpp layer
@@ -498,6 +492,14 @@ def _denormalize_columns(item, norm_meta, idx_type, n_indexes):
498492
return columns, denormed_columns, data
499493

500494

495+
def _check_valid_name(name):
496+
# bools are a subclass of int, so we need to check for them explicitly
497+
if isinstance(name, bool) or not isinstance(name, (str, int)):
498+
raise NormalizationException(
499+
f"Column and index names must be of type str or int, received {name} of type {type(name)}"
500+
)
501+
502+
501503
def _normalize_columns_names(columns_names, index_names, norm_meta, dynamic_schema=False):
502504
counter = Counter(columns_names)
503505
for idx in range(len(columns_names)):
@@ -510,10 +512,7 @@ def _normalize_columns_names(columns_names, index_names, norm_meta, dynamic_sche
510512
columns_names[idx] = new_name
511513
continue
512514

513-
if not isinstance(col, str) and not isinstance(col, int):
514-
raise NormalizationException(
515-
f"Column names must be of type str or int, received {col} of type {type(col)} on column number {idx}"
516-
)
515+
_check_valid_name(col)
517516

518517
col_str = str(col)
519518
columns_names[idx] = col_str

python/tests/hypothesis/arcticdb/test_append.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
import numpy as np
44
import pytest
55
from arcticdb.version_store import NativeVersionStore
6-
from arcticdb_ext.exceptions import (
7-
InternalException,
8-
NormalizationException,
9-
)
6+
from arcticdb_ext.exceptions import InternalException, NormalizationException, ArcticException as ArcticNativeException
107
from arcticdb_ext.version_store import StreamDescriptorMismatch
118
from arcticdb_ext import set_config_int
129
from hypothesis import given, assume, settings, strategies as st
@@ -287,3 +284,21 @@ def test_regular_append_dynamic_schema_named_index(
287284
lib.append(sym, df_1)
288285

289286
assert "date" in str(exception_info.value)
287+
288+
289+
@pytest.mark.parametrize(
290+
"idx", [pd.date_range(pd.Timestamp("2020-01-01"), periods=3), pd.RangeIndex(start=0, stop=3, step=1)]
291+
)
292+
def test_append_bool_named_col(lmdb_version_store_dynamic_schema, idx):
293+
symbol = "bad_append"
294+
295+
initial = pd.DataFrame({"col": [1, 2, 3]}, index=idx)
296+
lmdb_version_store_dynamic_schema.write(symbol, initial)
297+
298+
bad_df = pd.DataFrame({True: [4, 5, 6]}, index=idx)
299+
300+
# The normalization exception is getting reraised as an ArcticNativeException so we check for that
301+
with pytest.raises(ArcticNativeException):
302+
lmdb_version_store_dynamic_schema.append(symbol, bad_df)
303+
304+
assert_frame_equal(lmdb_version_store_dynamic_schema.read(symbol).data, initial)

python/tests/integration/arcticdb/test_update.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import numpy as np
1919
import string
2020

21+
from arcticdb_ext.exceptions import ArcticException as ArcticNativeException
22+
2123
from arcticdb.options import LibraryOptions
2224
from arcticdb.util._versions import IS_PANDAS_ONE
2325
from arcticdb.util.arctic_simulator import ArcticSymbolSimulator
@@ -555,3 +557,21 @@ def test_update_batch_different_updates_dynamic_schema(custom_library):
555557
ArcticSymbolSimulator.assert_frame_equal_rebuild_index_first(
556558
expected_results[result.symbol], read_data[result.symbol].data
557559
)
560+
561+
562+
@pytest.mark.parametrize(
563+
"idx", [pd.date_range(pd.Timestamp("2020-01-01"), periods=3), pd.RangeIndex(start=0, stop=3, step=1)]
564+
)
565+
def test_update_bool_named_col(lmdb_version_store_dynamic_schema, idx):
566+
symbol = "bad_append"
567+
568+
initial = pd.DataFrame({"col": [1, 2, 3]}, index=idx)
569+
lmdb_version_store_dynamic_schema.write(symbol, initial)
570+
571+
bad_df = pd.DataFrame({True: [4, 5, 6]}, index=idx)
572+
573+
# The normalization exception is getting reraised as an ArcticNativeException so we check for that
574+
with pytest.raises(ArcticNativeException):
575+
lmdb_version_store_dynamic_schema.update(symbol, bad_df)
576+
577+
assert_frame_equal(lmdb_version_store_dynamic_schema.read(symbol).data, initial)

python/tests/unit/arcticdb/version_store/test_recursive_normalizers.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
from arcticdb.exceptions import ArcticDbNotYetImplemented
1616
from arcticdb.util.venv import CompatLibrary
1717
from arcticdb.util.test import assert_frame_equal
18-
from arcticdb.exceptions import DataTooNestedException, UnsupportedKeyInDictionary
18+
from arcticdb.exceptions import (
19+
DataTooNestedException,
20+
UnsupportedKeyInDictionary,
21+
ArcticException as ArcticNativeException,
22+
)
1923
from arcticdb_ext.storage import KeyType
2024
from arcticdb_ext.version_store import NoSuchVersionException
2125
import arcticdb_ext.stream as adb_stream
@@ -909,3 +913,52 @@ def test_write_new_read_old(self, old_venv_and_arctic_uri, lib_name):
909913
],
910914
dfs=dfs,
911915
)
916+
917+
918+
def test_write_recursive_norm_bool_named_key(lmdb_version_store):
919+
symbol = "bad_write"
920+
ts = pd.Timestamp("2020-01-01")
921+
922+
df = pd.DataFrame({"col": [1, 2, 3]}, index=pd.date_range(ts, periods=3))
923+
924+
data = {True: df, "l": [1, 2, 3], "m": {"n": "o", "p": df}, "p": df}
925+
926+
lmdb_version_store.write(symbol, data, recursive_normalizers=True)
927+
928+
res = lmdb_version_store.read(symbol).data
929+
assert_frame_equal(res["True"], df)
930+
931+
932+
def test_write_recursive_norm_bool_named_columns(lmdb_version_store):
933+
symbol = "bad_write"
934+
ts = pd.Timestamp("2020-01-01")
935+
936+
df = pd.DataFrame({True: [1, 2, 3]}, index=pd.date_range(ts, periods=3))
937+
938+
data = {"c": df, "l": [1, 2, 3], "m": {"n": "o", "p": df}, "p": df}
939+
940+
# The normalization exception is getting reraised as an ArcticNativeException so we check for that
941+
with pytest.raises(ArcticNativeException):
942+
lmdb_version_store.write(symbol, data, recursive_normalizers=True)
943+
944+
assert lmdb_version_store.list_symbols() == []
945+
assert lmdb_version_store.has_symbol(symbol) is False
946+
947+
948+
@pytest.mark.parametrize(
949+
"idx", [pd.date_range(pd.Timestamp("2020-01-01"), periods=3), pd.RangeIndex(start=0, stop=3, step=1)]
950+
)
951+
def test_write_recursive_norm_bool_named_index(lmdb_version_store, idx):
952+
symbol = "bad_write"
953+
954+
df = pd.DataFrame({"col": [1, 2, 3]}, index=idx)
955+
df.index.name = True
956+
957+
data = {"col": df, "l": [1, 2, 3], "m": {"n": "o", "p": df}, "p": df}
958+
959+
# The normalization exception is getting reraised as an ArcticNativeException so we check for that
960+
with pytest.raises(ArcticNativeException):
961+
lmdb_version_store.write(symbol, data, recursive_normalizers=True)
962+
963+
assert lmdb_version_store.list_symbols() == []
964+
assert lmdb_version_store.has_symbol(symbol) is False

python/tests/unit/arcticdb/version_store/test_write.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import pandas as pd
1111
import pytest
12-
from arcticdb.exceptions import SortingException
12+
from arcticdb_ext.exceptions import SortingException, ArcticException as ArcticNativeException
1313
from arcticdb.util._versions import IS_PANDAS_TWO
1414
from arcticdb.util.test import assert_frame_equal
1515
from pandas import MultiIndex
@@ -160,3 +160,52 @@ def test_write_only_nan_column(self, lmdb_version_store, dtype):
160160
lib.write(sym, pd.DataFrame({"a": [np.nan]}, dtype=dtype))
161161
data = lib.read(sym).data
162162
assert_frame_equal(data, pd.DataFrame({"a": [np.nan]}, dtype=dtype))
163+
164+
165+
def test_write_bool_named_columns(lmdb_version_store):
166+
symbol = "bad_write"
167+
ts = pd.Timestamp("2020-01-01")
168+
169+
df = pd.DataFrame({True: [1, 2, 3]}, index=pd.date_range(ts, periods=3))
170+
171+
# The normalization exception is getting reraised as an ArcticNativeException so we check for that
172+
with pytest.raises(ArcticNativeException):
173+
lmdb_version_store.write(symbol, df)
174+
175+
assert lmdb_version_store.list_symbols() == []
176+
assert lmdb_version_store.has_symbol(symbol) is False
177+
178+
179+
@pytest.mark.parametrize(
180+
"idx", [pd.date_range(pd.Timestamp("2020-01-01"), periods=3), pd.RangeIndex(start=0, stop=3, step=1)]
181+
)
182+
def test_write_bool_named_index(lmdb_version_store, idx):
183+
symbol = "bad_write"
184+
185+
df = pd.DataFrame({"col": [1, 2, 3]}, index=idx)
186+
df.index.name = True
187+
188+
# The normalization exception is getting reraised as an ArcticNativeException so we check for that
189+
with pytest.raises(ArcticNativeException):
190+
lmdb_version_store.write(symbol, df)
191+
192+
assert lmdb_version_store.list_symbols() == []
193+
assert lmdb_version_store.has_symbol(symbol) is False
194+
195+
196+
@pytest.mark.parametrize(
197+
"idx", [pd.date_range(pd.Timestamp("2020-01-01"), periods=3), pd.RangeIndex(start=0, stop=3, step=1)]
198+
)
199+
@pytest.mark.parametrize("idx_names", [["index", True], [True, "index"]])
200+
def test_write_bool_named_multi_index(lmdb_version_store, idx, idx_names):
201+
symbol = "bad_write"
202+
203+
df = pd.DataFrame({"col": [1, 2, 3]}, index=pd.MultiIndex.from_arrays([idx, idx], names=idx_names))
204+
205+
lmdb_version_store.write(symbol, df)
206+
207+
# We do allow for the boolean index names in multiindex and they get normalized to strings
208+
# so this just tests the current behaviour and that we can read back the data correctly
209+
df.index.names = [str(n) for n in idx_names]
210+
211+
assert_frame_equal(lmdb_version_store.read(symbol).data, df)

0 commit comments

Comments
 (0)