Skip to content

Commit c798a20

Browse files
authored
Merge pull request #1011 from morotti/rmorotti-compatibility
compatibility with newer numpy/pandas
2 parents 0977c33 + 4badce0 commit c798a20

39 files changed

+110
-193
lines changed

.circleci/config.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ defaults: &defaults
5151
virtualenv ci
5252
. ci/bin/activate
5353
python setup.py develop
54-
pip install --upgrade pip
54+
pip install --upgrade pip==21.0.1 setuptools==58.0.4
5555
pip install pytest-server-fixtures[mongodb]
56-
echo y | pip uninstall tzlocal # because circleci will install >5.0b
57-
pip install tzlocal==4.2 # works for python 3.6 and 3.8
56+
pip uninstall -y tzlocal # because circleci will install >5.0b
57+
pip install $TEST_PACKAGES
5858
pip freeze
5959
python --version
6060
# Save dependency cache
@@ -95,6 +95,7 @@ jobs:
9595
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_6
9696
VERSION: $VERSION
9797
IMAGE_NAME: mangroup/arctic
98+
TEST_PACKAGES: "tzlocal==4.2 numpy==1.14.6 pandas==0.25.3"
9899
working_directory: ~/arctic_3_6
99100
docker:
100101
- image: cimg/python:3.6-node
@@ -107,6 +108,7 @@ jobs:
107108
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_7
108109
VERSION: $VERSION
109110
IMAGE_NAME: mangroup/arctic
111+
TEST_PACKAGES: "tzlocal==4.2 numpy==1.18.5 pandas==1.0.5"
110112
working_directory: ~/arctic_3_7
111113
docker:
112114
- image: cimg/python:3.7-node
@@ -119,6 +121,7 @@ jobs:
119121
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_8
120122
VERSION: $VERSION
121123
IMAGE_NAME: mangroup/arctic
124+
TEST_PACKAGES: "tzlocal==4.2 numpy==1.21.6 pandas==1.3.5"
122125
working_directory: ~/arctic_3_8
123126
docker:
124127
- image: cimg/python:3.8-node

CHANGES.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
## Changelog
22

3+
### HEAD
4+
* Compatibility: update pinning from pandas<1.1, to pandas<2
5+
* Compatibility: update pinning from numpy<1.19 to numpy<2
6+
* Build: specify which version of python/numpy/pandas to test
7+
* Bugfix: #887 #928 resolve import error on pandas.Panel, Panel was removed in pandas 1.0
8+
* Bugfix: fix pandas get_timezone() argument cannot be None
9+
* Bugfix: fix np.float/np.int/np.book depreciations
10+
* Bugfix: fix numpy tostring() is deprecated. Use tobytes() instead.
11+
* Bugfix: numpy.testing.utils is deprecated
12+
* Bugfix: pandas.util.testing is deprecated
13+
* Cleanup: remove async_benchmark
14+
315
### 1.81.2 (2023-05-11)
416
* Feature: #1000, #1001, #1002, #1003, #1007, #1008 ArcticDB docs, links etc
517
* Bugfix: #1009 force CircleCI to install tzlocal==4.2

arctic/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
""" The Arctic TimeSeries and Tick store."""
22

3+
import pandas
4+
35
from .arctic import Arctic, register_library_type
46
from .arctic import VERSION_STORE, TICK_STORE, CHUNK_STORE
57
from .store._ndarray_store import NdarrayStore
@@ -24,5 +26,8 @@
2426

2527
register_versioned_storage(PandasDataFrameStore)
2628
register_versioned_storage(PandasSeriesStore)
27-
register_versioned_storage(PandasPanelStore)
2829
register_versioned_storage(NdarrayStore)
30+
31+
if pandas.__version__.startswith("0."):
32+
# Panel is removed in pandas 1
33+
register_versioned_storage(PandasPanelStore)

arctic/_util.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
import numpy as np
44
import pymongo
55
from pandas import DataFrame
6-
try:
7-
from pandas.testing import assert_frame_equal
8-
except ImportError:
9-
from pandas.util.testing import assert_frame_equal
6+
from pandas.testing import assert_frame_equal
107

118
from ._config import FW_POINTERS_CONFIG_KEY, FwPointersCfg
129

arctic/serialization/incremental.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
log = logging.getLogger(__name__)
2020

2121

22-
def incremental_checksum(item, curr_sha=None, is_bytes=False):
22+
def incremental_checksum(item, curr_sha=None):
2323
curr_sha = hashlib.sha1() if curr_sha is None else curr_sha
24-
curr_sha.update(item if is_bytes else item.tostring())
24+
curr_sha.update(item)
2525
return curr_sha
2626

2727

@@ -182,7 +182,7 @@ def checksum(self, from_idx, to_idx):
182182
for chunk_bytes, dtype in self.generator_bytes(from_idx=from_idx, to_idx=to_idx):
183183
# TODO: what about compress_array here in batches?
184184
compressed_chunk = compress(chunk_bytes)
185-
total_sha = incremental_checksum(compressed_chunk, curr_sha=total_sha, is_bytes=True)
185+
total_sha = incremental_checksum(compressed_chunk, curr_sha=total_sha)
186186
self._checksum = Binary(total_sha.digest())
187187
return self._checksum
188188

@@ -220,7 +220,7 @@ def _generator(self, from_idx, to_idx, get_bytes=False):
220220
forced_dtype=self.dtype if self._has_string_object else None)
221221

222222
# Let the gc collect the intermediate serialized chunk as early as possible
223-
chunk = chunk.tostring() if chunk is not None and get_bytes else chunk
223+
chunk = chunk.tobytes() if chunk is not None and get_bytes else chunk
224224

225225
yield chunk, self.dtype, from_idx, curr_stop
226226
from_idx = curr_stop

arctic/serialization/numpy_arrays.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ def docify(self, df):
115115
arr, mask = self._convert_types(df[c].values)
116116
dtypes[str(c)] = arr.dtype.str
117117
if mask is not None:
118-
masks[str(c)] = Binary(compress(mask.tostring()))
119-
arrays.append(arr.tostring())
118+
masks[str(c)] = Binary(compress(mask.tobytes()))
119+
arrays.append(arr.tobytes())
120120
except Exception as e:
121121
typ = infer_dtype(df[c], skipna=False)
122122
msg = "Column '{}' type is {}".format(str(c), typ)

arctic/serialization/numpy_records.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,23 @@ def _multi_index_to_records(index, empty_index):
6363
log.info("Level in MultiIndex has no name, defaulting to %s" % index_names[i])
6464
index_tz = []
6565
for i in index.levels:
66-
if isinstance(i, DatetimeIndex):
67-
tmp = get_timezone(i.tz)
68-
index_tz.append(str(tmp) if tmp is not None else None)
66+
if isinstance(i, DatetimeIndex) and i.tz is not None:
67+
if PD_VER < '1.1.0':
68+
tmp = get_timezone(i.tz)
69+
index_tz.append(str(tmp))
70+
else:
71+
# see implementation of get_timezone() in github
72+
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/_libs/tslibs/timezones.pyx
73+
# https://github.com/pandas-dev/pandas/blob/v1.0.5/pandas/_libs/tslibs/timezones.pyx
74+
# https://github.com/pandas-dev/pandas/blob/v1.3.5/pandas/_libs/tslibs/timezones.pyx
75+
# get_timezone(mktz("UTC")) returns 'dateutil//usr/share/zoneinfo/UTC' string before pandas 1.3
76+
# get_timezone(mktz("UTC")) returns the timezone object in pandas 1.3+
77+
# we need a stable string to serialize and to deserialize,
78+
# so let's always encode the filename like pandas is doing for standard datetime objects.
79+
# NOTE: constrary to what it looks like, the path is not dependent on the machine,
80+
# there's logic in pandas and datetime to strip the directory prefix.
81+
tmp = "dateutil/" + i.tz._filename
82+
index_tz.append(str(tmp))
6983
else:
7084
index_tz.append(None)
7185

arctic/store/_ndarray_store.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def append(self, arctic_lib, version, symbol, item, previous_version, dtype=None
445445
self._do_append(collection, version, symbol, item, previous_version, dirty_append)
446446

447447
def _do_append(self, collection, version, symbol, item, previous_version, dirty_append):
448-
data = item.tostring()
448+
data = item.tobytes()
449449
# Compatibility with Arctic 1.22.0 that didn't write base_sha into the version document
450450
version['base_sha'] = previous_version.get('base_sha', Binary(b''))
451451
version['up_to'] = previous_version['up_to'] + len(item)
@@ -596,7 +596,7 @@ def check_written(collection, symbol, version):
596596

597597
def checksum(self, item):
598598
sha = hashlib.sha1()
599-
sha.update(item.tostring())
599+
sha.update(item.tobytes())
600600
return Binary(sha.digest())
601601

602602
def write(self, arctic_lib, version, symbol, item, previous_version, dtype=None):
@@ -656,7 +656,7 @@ def _do_write(self, collection, version, symbol, item, previous_version, segment
656656

657657
# Compress
658658
idxs = range(int(np.ceil(float(length) / rows_per_chunk)))
659-
chunks = [(item[i * rows_per_chunk: (i + 1) * rows_per_chunk]).tostring() for i in idxs]
659+
chunks = [(item[i * rows_per_chunk: (i + 1) * rows_per_chunk]).tobytes() for i in idxs]
660660
compressed_chunks = compress_array(chunks)
661661

662662
# Write

arctic/store/_pandas_ndarray_store.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,6 @@
44
import numpy as np
55
from bson.binary import Binary
66
from pandas import DataFrame, Series
7-
try:
8-
# TODO delete when early Pandas version support removed
9-
import warnings
10-
warnings.filterwarnings("ignore", category=FutureWarning)
11-
from pandas import Panel
12-
except ImportError:
13-
pass
147

158
from arctic._util import NP_OBJECT_DTYPE
169
from arctic.serialization.numpy_records import SeriesSerializer, DataFrameSerializer
@@ -62,7 +55,7 @@ def _segment_index(self, recarr, existing_index, start, new_segments):
6255
if start > 0:
6356
existing_index_arr = existing_index_arr[existing_index_arr['index'] < start]
6457
index = np.concatenate((existing_index_arr, index))
65-
return Binary(compress(index.tostring()))
58+
return Binary(compress(index.tobytes()))
6659
elif existing_index:
6760
raise ArcticException("Could not find datetime64 index in item but existing data contains one")
6861
return None
@@ -220,6 +213,8 @@ class PandasPanelStore(PandasDataFrameStore):
220213

221214
@staticmethod
222215
def can_write_type(data):
216+
# late import, Panel is removed in pandas 1
217+
from pandas import Panel
223218
return isinstance(data, Panel)
224219

225220
def can_write(self, version, symbol, data):

arctic/store/_version_store_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ def _split_arrs(array_2d, slices):
2020
but avoids fancy indexing
2121
"""
2222
if len(array_2d) == 0:
23-
return np.empty(0, dtype=np.object)
23+
return np.empty(0, dtype=object)
2424

25-
rtn = np.empty(len(slices) + 1, dtype=np.object)
25+
rtn = np.empty(len(slices) + 1, dtype=object)
2626
start = 0
2727
for i, s in enumerate(slices):
2828
rtn[i] = array_2d[start:s]

0 commit comments

Comments
 (0)