Skip to content

Commit 4e1f823

Browse files
committed
FEAT: Implement version tracking and rollback functionality in GuepardDataFrame
1 parent c7a0893 commit 4e1f823

File tree

1 file changed

+28
-17
lines changed

1 file changed

+28
-17
lines changed

guepard_pandas/guepard_dataframe.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,47 @@
11
import pandas as pd
2-
import requests
2+
import os
3+
import pickle
4+
from datetime import datetime
35

46
class GuepardDataFrame(pd.DataFrame):
57
def __init__(self, *args, **kwargs):
68
super().__init__(*args, **kwargs)
7-
self.api_url = "https://api.guepard.com"
8-
self.dataset_id = kwargs.get('dataset_id', 'default')
9+
self.version_dir = kwargs.pop('version_dir', './versions')
10+
if not os.path.exists(self.version_dir):
11+
os.makedirs(self.version_dir)
912

1013
def commit(self, message=""):
1114
version_id = self._generate_version_id()
12-
data = self.to_parquet()
13-
response = requests.post(f"{self.api_url}/datasets/{self.dataset_id}/versions",
14-
files={"data": data},
15-
data={"message": message, "version_id": version_id})
16-
response.raise_for_status()
15+
version_path = os.path.join(self.version_dir, f"{version_id}.pkl")
16+
with open(version_path, 'wb') as f:
17+
pickle.dump(self, f)
1718
return version_id
1819

1920
def list_versions(self):
20-
response = requests.get(f"{self.api_url}/datasets/{self.dataset_id}/versions")
21-
response.raise_for_status()
22-
return response.json()
21+
versions = []
22+
for filename in os.listdir(self.version_dir):
23+
if filename.endswith(".pkl"):
24+
version_id = filename.split('.')[0]
25+
versions.append(version_id)
26+
return versions
2327

2428
def rollback(self, version_id):
25-
response = requests.get(f"{self.api_url}/datasets/{self.dataset_id}/versions/{version_id}")
26-
response.raise_for_status()
27-
data = response.content
28-
df = pd.read_parquet(data)
29+
version_path = os.path.join(self.version_dir, f"{version_id}.pkl")
30+
if not os.path.exists(version_path):
31+
raise ValueError("Version ID not found")
32+
with open(version_path, 'rb') as f:
33+
df = pickle.load(f)
2934
self.__init__(df)
3035

3136
def next_version(self):
3237
return self.commit()
3338

3439
def _generate_version_id(self):
35-
from datetime import datetime
36-
return datetime.now().strftime("%Y%m%d_%H%M%S")
40+
return datetime.now().strftime("%Y%m%d_%H%M%S")
41+
42+
# Example usage:
43+
# df = GuepardDataFrame(pd.read_csv("data.csv"), version_dir="path/to/versions")
44+
# df["new_col"] = df["existing_col"] * 2
45+
# df.commit("Added new column")
46+
# print(df.list_versions())
47+
# df.rollback(version_id="20240326_123456")

0 commit comments

Comments
 (0)