Skip to content

Commit 0ae918e

Browse files
authored
Merge pull request #55 from i2mint/dev
Dev
2 parents cbae4bd + de33d49 commit 0ae918e

35 files changed

+4175
-1057
lines changed

py2store/__init__.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,32 @@
44
file_sep = os.path.sep
55

66
# Imports to be able to easily get started...
7-
from py2store.base import KvCollection, KvReader, KvPersister, Reader, Persister
8-
from py2store.stores.local_store import LocalStore, LocalBinaryStore, LocalTextStore, PickleStore
7+
from py2store.base import Collection, KvReader, KvPersister, Reader, Persister
8+
9+
from py2store.stores.local_store import LocalStore, LocalBinaryStore, LocalTextStore, LocalPickleStore, LocalJsonStore
910
from py2store.stores.local_store import QuickStore, QuickBinaryStore, QuickTextStore, QuickJsonStore, QuickPickleStore
11+
12+
from py2store.misc import MiscGetter, MiscGetterAndSetter, misc_objs, misc_objs_get, get_obj, set_obj
1013
from py2store.base import Store
11-
from py2store.trans import wrap_kvs
12-
from py2store.access import user_configs_dict, user_configs, user_defaults_dict, user_defaults
14+
from py2store.trans import wrap_kvs, disable_delitem, disable_setitem, mk_read_only, kv_wrap, cache_iter, filtered_iter
15+
from py2store.access import user_configs_dict, user_configs, user_defaults_dict, user_defaults, mystores
16+
17+
from py2store.stores.local_store import PickleStore # consider deprecating and use LocalPickleStore instead?
18+
from py2store.persisters.local_files import ZipReader, FilesOfZip, ZipFilesReader
1319

1420
with ModuleNotFoundIgnore():
1521
from py2store.stores.s3_store import S3BinaryStore, S3TextStore, S3PickleStore
1622
with ModuleNotFoundIgnore():
1723
from py2store.stores.mongo_store import MongoStore, MongoTupleKeyStore, MongoAnyKeyStore
24+
25+
26+
def kvhead(store):
27+
"""Get the first item of a kv store"""
28+
for k in store:
29+
return k, store[k]
30+
31+
32+
def ihead(store):
33+
"""Get the first item of an iterable"""
34+
for item in store:
35+
return item

py2store/access.py

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
"""
2-
The logic to allow configurations (and defaults) to be parametrized by external environmental variables and files.
2+
Utils to load stores from store specifications.
3+
Includes the logic to allow configurations (and defaults) to be parametrized by external environmental
4+
variables and files.
5+
6+
Every data-sourced problem has it's problem-relevant stores. Once you get your stores right, along with the
7+
right access credentials, indexing, serialization, caching, filtering etc. you'd like to be able to name, save
8+
and/or share this specification, and easily get access to it later on.
9+
10+
Here are tools to help you out.
311
412
There are two main key-value stores: One for configurations the user wants to reuse, and the other for the user's
513
desired defaults. Both have the same structure:
@@ -15,9 +23,18 @@
1523
respectively.
1624
"""
1725
import os
26+
import importlib
1827
from warnings import warn
28+
from functools import reduce
1929
from py2store.util import DictAttr, str_to_var_str
2030

31+
FAK = '$fak'
32+
33+
34+
# TODO: Make a config_utils.py module to centralize config tools (configs for access is just one -- serializers another)
35+
# TODO: Integrate (external because not standard lib) other safer tools for secrets, such as:
36+
# https://github.com/SimpleLegal/pocket_protector
37+
2138

2239
def getenv(name, default=None):
2340
"""Like os.getenv, but removes a suffix \\r character if present (problem with some env var systems)"""
@@ -28,10 +45,105 @@ def getenv(name, default=None):
2845
return v
2946

3047

48+
def assert_callable(f: callable) -> callable:
49+
assert callable(f), f"Is not callable: {f}"
50+
return f
51+
52+
53+
def dotpath_to_obj(dotpath):
54+
"""Loads and returns the object referenced by the string DOTPATH_TO_MODULE.OBJ_NAME"""
55+
*module_path, obj_name = dotpath.split('.')
56+
return getattr(importlib.import_module('.'.join(module_path)), obj_name)
57+
58+
59+
def dotpath_to_func(f: (str, callable)) -> callable:
60+
"""Loads and returns the function referenced by f,
61+
which could be a callable or a DOTPATH_TO_MODULE.FUNC_NAME dotpath string to one.
62+
"""
63+
64+
if isinstance(f, str):
65+
if '.' in f:
66+
*module_path, func_name = f.split('.')
67+
f = getattr(importlib.import_module('.'.join(module_path)), func_name)
68+
else:
69+
f = getattr(importlib.import_module('py2store'), f)
70+
71+
return assert_callable(f)
72+
73+
74+
def compose(*functions):
75+
"""Make a function that is the composition of the input functions"""
76+
return reduce(lambda f, g: lambda x: f(g(x)), functions, lambda x: x)
77+
78+
79+
def dflt_func_loader(f) -> callable:
80+
"""Loads and returns the function referenced by f,
81+
which could be a callable or a DOTPATH_TO_MODULE.FUNC_NAME dotpath string to one, or a pipeline of these
82+
"""
83+
if isinstance(f, str) or callable(f):
84+
return dotpath_to_func(f)
85+
else:
86+
return compose(*map(dflt_func_loader, f))
87+
88+
89+
def _fakit(f: callable, a: (tuple, list), k: dict):
90+
return f(*(a or ()), **(k or {}))
91+
92+
93+
def fakit_from_dict(d, func_loader=assert_callable):
94+
return _fakit(func_loader(d['f']), a=d.get('a', ()), k=d.get('k', {}))
95+
96+
97+
def fakit_from_tuple(t: (tuple, list), func_loader: callable = dflt_func_loader):
98+
f = func_loader(t[0])
99+
a = ()
100+
k = {}
101+
assert len(t) in {1, 2, 3}, "A tuple fak must be of length 1, 2, or 3. No more, no less."
102+
if len(t) > 1:
103+
if isinstance(t[1], dict):
104+
k = t[1]
105+
else:
106+
assert isinstance(t[1], (tuple, list)), "argument specs should be dict, tuple, or list"
107+
a = t[1]
108+
if len(t) > 2:
109+
if isinstance(t[2], dict):
110+
assert not k, "can only have one kwargs"
111+
k = t[2]
112+
else:
113+
assert isinstance(t[2], (tuple, list)), "argument specs should be dict, tuple, or list"
114+
assert not a, "can only have one args"
115+
a = t[2]
116+
return _fakit(f, a, k)
117+
118+
119+
def fakit(fak, func_loader=dflt_func_loader):
120+
"""Execute a fak with given f, a, k and function loader.
121+
122+
Essentially returns func_loader(f)(*a, **k)
123+
124+
Args:
125+
fak: A (f, a, k) specification. Could be a tuple or a dict (with 'f', 'a', 'k' keys). All but f are optional.
126+
func_loader: A function returning a function. This is where you specify any validation of func specification f,
127+
and/or how to get a callable from it.
128+
129+
Returns: A python object.
130+
"""
131+
132+
if isinstance(fak, dict):
133+
return fakit_from_dict(fak, func_loader=func_loader)
134+
else:
135+
assert isinstance(fak, (tuple, list)), "fak should be dict, tuple, or list"
136+
return fakit_from_tuple(fak, func_loader=func_loader)
137+
138+
139+
fakit.from_dict = fakit_from_dict
140+
fakit.from_tuple = fakit_from_tuple
141+
31142
user_configs_dict = {}
32143
user_defaults_dict = {}
33144
user_configs = None
34145
user_defaults = None
146+
mystores = None
35147

36148
try:
37149
import json
@@ -53,6 +165,44 @@ def directory_json_items():
53165

54166
user_configs = DictAttr(**{k: v for k, v in directory_json_items()})
55167

168+
from py2store.base import KvStore
169+
from py2store.stores.local_store import LocalJsonStore
170+
from py2store.trans import wrap_kvs
171+
172+
173+
class MyStores(KvStore):
174+
func_loader = staticmethod(dflt_func_loader)
175+
176+
def _obj_of_data(self, data):
177+
if '$fak' in data:
178+
return fakit(data['$fak'], self.func_loader)
179+
else:
180+
msg = "Case not handled by MyStores"
181+
if isinstance(data, dict):
182+
raise ValueError(f"{msg}: keys: {list(data.keys())}")
183+
else:
184+
raise ValueError(f"{msg}: type: {type(data)}")
185+
186+
@property
187+
def configs(self):
188+
return self.store
189+
190+
191+
def without_json_ext(_id):
192+
assert _id.endswith('.json'), "Should end with .json"
193+
return _id[:-len('.json')]
194+
195+
196+
def add_json_ext(k):
197+
return k + '.json'
198+
199+
200+
ExtLessJsonStore = wrap_kvs(LocalJsonStore, name='ExtLessJsonStore',
201+
key_of_id=without_json_ext, id_of_key=add_json_ext)
202+
203+
stores_json_path_format = os.path.join(user_configs_dirpath, 'stores', 'json', '{}.json')
204+
mystores = MyStores(ExtLessJsonStore(stores_json_path_format))
205+
56206
else:
57207
warn(f"The configs directory wasn't found (please make it): {user_configs_dirpath}")
58208
warn("Configs in a single json is being deprecated")

py2store/base.py

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
the storage methods themselves.
2525
"""
2626

27-
from collections.abc import Collection, Mapping, MutableMapping
27+
from collections.abc import Collection as CollectionABC
28+
from collections.abc import Mapping, MutableMapping
2829
from typing import Any, Iterable, Tuple
2930

3031
Key = Any
@@ -37,20 +38,19 @@
3738
ItemIter = Iterable[Item]
3839

3940

40-
class KvCollection(Collection):
41+
class Collection(CollectionABC):
4142

42-
def __contains__(self, k: Key) -> bool:
43+
def __contains__(self, x) -> bool:
4344
"""
4445
Check if collection of keys contains k.
45-
Note: This method actually fetches the contents for k, returning False if there's a key error trying to do so
46+
Note: This method loops through all contents of collection to see if query element exists.
4647
Therefore it may not be efficient, and in most cases, a method specific to the case should be used.
4748
:return: True if k is in the collection, and False if not
4849
"""
49-
try:
50-
self.__getitem__(k)
51-
return True
52-
except KeyError:
53-
return False
50+
for existing_x in self.__iter__():
51+
if existing_x == x:
52+
return True
53+
return False
5454

5555
def __len__(self) -> int:
5656
"""
@@ -59,21 +59,44 @@ def __len__(self) -> int:
5959
Therefore it is not efficient, and in most cases should be overridden with a more efficient version.
6060
:return: The number (int) of elements in the collection of keys.
6161
"""
62-
# TODO: some other means to more quickly count files?
6362
# Note: Found that sum(1 for _ in self.__iter__()) was slower for small, slightly faster for big inputs.
6463
count = 0
6564
for _ in self.__iter__():
6665
count += 1
6766
return count
6867

6968
def head(self):
70-
return next(iter(self.items()))
69+
if hasattr(self, 'items'):
70+
return next(iter(self.items()))
71+
else:
72+
return next(iter(self))
73+
74+
75+
KvCollection = Collection # alias meant for back-compatibility. Would like to deprecated
7176

7277

73-
class KvReader(KvCollection, Mapping):
78+
# def getitem_based_contains(self, x) -> bool:
79+
# """
80+
# Check if collection of keys contains k.
81+
# Note: This method actually fetches the contents for k, returning False if there's a key error trying to do so
82+
# Therefore it may not be efficient, and in most cases, a method specific to the case should be used.
83+
# :return: True if k is in the collection, and False if not
84+
# """
85+
#
86+
# try:
87+
# self.__getitem__(k)
88+
# return True
89+
# except KeyError:
90+
# return False
91+
92+
93+
class KvReader(Collection, Mapping):
7494
"""Acts as a Mapping abc, but with default __len__ (implemented by counting keys)
7595
and head method to get the first (k, v) item of the store"""
76-
pass
96+
97+
def head(self):
98+
for k, v in self.items():
99+
yield k, v
77100

78101

79102
Reader = KvReader # alias
@@ -117,11 +140,12 @@ class Store(Persister):
117140
"""
118141
By store we mean key-value store. This could be files in a filesystem, objects in s3, or a database. Where and
119142
how the content is stored should be specified, but StoreInterface offers a dict-like interface to this.
143+
::
144+
__getitem__ calls: _id_of_key _obj_of_data
145+
__setitem__ calls: _id_of_key _data_of_obj
146+
__delitem__ calls: _id_of_key
147+
__iter__ calls: _key_of_id
120148
121-
__getitem__ calls: _id_of_key _obj_of_data
122-
__setitem__ calls: _id_of_key _data_of_obj
123-
__delitem__ calls: _id_of_key
124-
__iter__ calls: _key_of_id
125149
126150
>>> # Default store: no key or value conversion ################################################
127151
>>> s = Store()
@@ -226,25 +250,56 @@ def __getitem__(self, k: Key) -> Val:
226250
return self._obj_of_data(self.store.__getitem__(self._id_of_key(k)))
227251

228252
def get(self, k: Key, default=None) -> Val:
229-
data = self.store.get(self._id_of_key(k), no_such_item)
230-
if data is not no_such_item:
231-
return self._obj_of_data(data)
232-
else:
233-
return default
253+
if hasattr(self.store, 'get'): # if store has a get method, use it
254+
data = self.store.get(self._id_of_key(k), no_such_item)
255+
if data is not no_such_item:
256+
return self._obj_of_data(data)
257+
else:
258+
return default
259+
else: # if not, do the get function otherwise
260+
if k in self:
261+
return self._obj_of_data(self[k])
262+
else:
263+
return default
234264

235265
# Explore ####################################################################
236266
def __iter__(self) -> KeyIter:
237267
return map(self._key_of_id, self.store.__iter__())
238268

269+
# def items(self) -> ItemIter:
270+
# if hasattr(self.store, 'items'):
271+
# yield from ((self._key_of_id(k), self._obj_of_data(v)) for k, v in self.store.items())
272+
# else:
273+
# yield from ((self._key_of_id(k), self._obj_of_data(self.store[k])) for k in self.store.__iter__())
274+
239275
def __len__(self) -> int:
240276
return self.store.__len__()
241277

242278
def __contains__(self, k) -> bool:
243279
return self.store.__contains__(self._id_of_key(k))
244280

245281
def head(self) -> Item:
246-
for k, v in self.items():
247-
return k, v
282+
try:
283+
for k in self:
284+
return k, self[k]
285+
except Exception as e:
286+
287+
from warnings import warn
288+
msg = f"Couldn't get data for the key {k}. This could be be...\n"
289+
msg += "... because it's not a store (just a collection, that doesn't have a __getitem__)\n"
290+
msg += "... because there's a layer transforming outcoming keys that are not the ones the store actually " \
291+
"uses? If you didn't wrap the store with the inverse ingoing keys transformation, " \
292+
"that would happen.\n"
293+
msg += "I'll ask the inner-layer what it's head is, but IT MAY NOT REFLECT the reality of your store " \
294+
"if you have some filtering, caching etc."
295+
msg += f"The error messages was: \n{e}"
296+
warn(msg)
297+
298+
for _id in self.store:
299+
return self._key_of_id(_id), self._obj_of_data(self.store[_id])
300+
# NOTE: Old version didn't work when key mapping was asymmetrical
301+
# for k, v in self.items():
302+
# return k, v
248303

249304
# Write ####################################################################
250305
def __setitem__(self, k: Key, v: Val):
@@ -277,6 +332,7 @@ def __repr__(self):
277332

278333
def has_kv_store_interface(o):
279334
"""Check if object has the KvStore interface (that is, has the kv wrapper methods
335+
280336
Args:
281337
o: object (class or instance)
282338

0 commit comments

Comments
 (0)