Skip to content

Commit 4e6b097

Browse files
authored
Merge pull request #1 from ethho/release/v0.3.0
Version 0.3.0
2 parents bef633b + e644516 commit 4e6b097

File tree

5 files changed

+42
-49
lines changed

5 files changed

+42
-49
lines changed

poetry.lock

Lines changed: 14 additions & 32 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "memoize"
3-
version = "0.2.1"
3+
version = "0.3.0"
44
description = "Python3 memoization decorator"
55
authors = ["Ethan Ho <ethan.n.ho@gmail.com>"]
66
license = "MIT"

src/memoize/dataframe.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,34 @@
1414

1515
from .main import _clean_func_name, _get_hist_fps, _make_key
1616

17-
18-
def _read_csv(fp: str) -> pd.DataFrame:
17+
def _read(ext: str, fp: str) -> pd.DataFrame:
1918
"""Reads DataFrame from CSV file at `fp`."""
20-
return pd.read_csv(fp)
19+
if ext == 'csv':
20+
return pd.read_csv(fp)
21+
elif ext == 'parquet':
22+
return pd.read_parquet(fp)
23+
else:
24+
raise Exception(f"Unsupported file extension {ext}")
2125

2226

23-
def _write_csv(fp: str, df: pd.DataFrame):
24-
"""Write DataFrame to CSV file at `fp` from DataFrame `df`."""
25-
write_index = bool(df.index.name)
26-
return df.to_csv(fp, index=write_index)
27+
def _write(ext: str, fp: str, df: pd.DataFrame):
28+
if ext == 'csv':
29+
write_index = bool(df.index.name)
30+
return df.to_csv(fp, index=write_index)
31+
elif ext == 'parquet':
32+
if not pd.api.types.is_object_dtype(df.columns.dtype):
33+
print(f"WARNING: Converting column names to string dtype")
34+
df.columns = df.columns.astype(str)
35+
return df.to_parquet(fp)
36+
else:
37+
raise Exception(f"Unsupported file extension {ext}")
2738

2839

2940
def memoize_df(
3041
stub: Optional[str] = None,
3142
cache_dir: Optional[str] = '/tmp/memoize',
3243
ext: str = 'csv',
3344
log_func: Callable = print,
34-
ignore_invalid: bool = True,
3545
cache_lifetime_days: int = 0
3646
) -> Callable:
3747
"""
@@ -61,7 +71,7 @@ def memoize_dec(*args, **kwargs):
6171
if not kwargs.get('_memoize_force_refresh'):
6272
for hist_fp in hist_fps:
6373
log_func(f"Using cached call from {hist_fp}")
64-
return _read_csv(hist_fp)
74+
return _read(ext, hist_fp)
6575

6676
# Else run the function and store cached result
6777
result = func(*args, **kwargs)
@@ -71,7 +81,7 @@ def memoize_dec(*args, **kwargs):
7181
f"Failed to write return value of function '{funcname}' to CSV file. "
7282
f"Expected a pandas.DataFrame, received {type(result)}."
7383
)
74-
_write_csv(fp, result)
84+
_write(ext, fp, result)
7585
return result
7686
return memoize_dec
7787
return add_memoize_dec

src/memoize/main.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def _make_key(func_name: str, args: List, kwargs: Dict, maxlen: int = None) -> s
1616
d['_func_name'] = func_name
1717
d['_args'] = args
1818
hl = hashlib.new('sha256')
19-
hl.update(json.dumps(d, sort_keys=True).encode())
19+
hl.update(json.dumps(d, sort_keys=True).encode())
2020
as_str = hl.hexdigest()
2121
if maxlen:
2222
as_str = as_str[:maxlen]
@@ -48,7 +48,7 @@ def _get_hist_fps(query: str, cache_lifetime_days: int = None) -> List[str]:
4848
except Exception as err:
4949
raise
5050
dt_grps.append(item)
51-
51+
5252
fps = [
5353
file['fp'] for file in
5454
# Sort filepaths starting with most recent
@@ -92,7 +92,7 @@ def memoize(stub: Optional[str] = None,
9292
raise Exception(f'{cache_dir=} exists but is not a directory')
9393
else:
9494
os.makedirs(cache_dir)
95-
stub = stub if stub else date.today().strftime('%Y%m%d')
95+
stub = stub if stub else date.today().strftime('%Y%m%d')
9696

9797
def add_memoize_dec(func):
9898
funcname = _clean_func_name(func.__name__)
@@ -118,7 +118,7 @@ def memoize_dec(*args, **kwargs):
118118
text = json.dumps(cache)
119119
f.write(text)
120120
return cache[key]
121-
121+
122122
# Else run the function and store cached result
123123
result = func(*args, **kwargs)
124124
cache[key] = result

tests/test_dataframe.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ def example_func(foo: int):
1010
return df
1111

1212

13-
def test_memoize():
14-
wrapped = memoize_df(cache_lifetime_days=None)(example_func)
13+
@pytest.mark.parametrize('ext', ['csv', 'parquet'])
14+
def test_memoize(ext):
15+
wrapped = memoize_df(cache_lifetime_days=None, ext=ext)(example_func)
1516
print(wrapped(2))
1617
print(wrapped(3))
1718
print(wrapped(5))

0 commit comments

Comments
 (0)