Skip to content

Commit 2cb40d1

Browse files
committed
FEAT: Enhance GuepardDataFrame with version management and rollback capabilities
1 parent 4e1f823 commit 2cb40d1

File tree

3 files changed

+372
-19
lines changed

3 files changed

+372
-19
lines changed

guepard_pandas/data.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
id,nom
2+
1,nour
3+
2,kobbi
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 23,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import pandas as pd\n",
10+
"import os\n",
11+
"import pickle\n",
12+
"from datetime import datetime\n",
13+
"\n",
14+
"class GuepardDataFrame(pd.DataFrame):\n",
15+
" def __init__(self, *args, **kwargs):\n",
16+
" version_dir = kwargs.pop('version_dir', './versions')\n",
17+
" super().__init__(*args, **kwargs)\n",
18+
" self.current_version_path = os.path.join(version_dir, 'current_version.pkl')\n",
19+
" self.version_dir = version_dir\n",
20+
" self.versions_meta_file = os.path.join(version_dir, 'versions_meta.pkl')\n",
21+
" if not os.path.exists(self.version_dir):\n",
22+
" os.makedirs(self.version_dir)\n",
23+
" if 'data' in kwargs:\n",
24+
" self._load_data(kwargs['data'])\n",
25+
" else:\n",
26+
" self._load_current_version()\n",
27+
" \n",
28+
" def _load_data(self, data):\n",
29+
" super().__init__(data)\n",
30+
" \n",
31+
" def _load_current_version(self):\n",
32+
" if os.path.exists(self.current_version_path):\n",
33+
" with open(self.current_version_path, 'rb') as f:\n",
34+
" df = pickle.load(f)\n",
35+
" super().__init__(df)\n",
36+
" \n",
37+
" def commit(self, message=\"\"):\n",
38+
" version_id = self._generate_version_id()\n",
39+
" self._save_current_version()\n",
40+
" self._store_version_meta(version_id, message)\n",
41+
" return version_id\n",
42+
" \n",
43+
" def _save_current_version(self):\n",
44+
" with open(self.current_version_path, 'wb') as f:\n",
45+
" pickle.dump(self, f)\n",
46+
" \n",
47+
" def _store_version_meta(self, version_id, message):\n",
48+
" versions_meta = self._load_versions_meta()\n",
49+
" versions_meta.append({'version_id': version_id, 'message': message, 'timestamp': datetime.now()})\n",
50+
" with open(self.versions_meta_file, 'wb') as f:\n",
51+
" pickle.dump(versions_meta, f)\n",
52+
" \n",
53+
" def _load_versions_meta(self):\n",
54+
" if os.path.exists(self.versions_meta_file):\n",
55+
" with open(self.versions_meta_file, 'rb') as f:\n",
56+
" return pickle.load(f)\n",
57+
" return []\n",
58+
" \n",
59+
" def list_versions(self):\n",
60+
" versions_meta = self._load_versions_meta()\n",
61+
" return [{'version_id': meta['version_id'], 'message': meta['message'], 'timestamp': meta['timestamp']} for meta in versions_meta]\n",
62+
" \n",
63+
" def rollback(self, version_id):\n",
64+
" version_path = os.path.join(self.version_dir, f\"{version_id}.pkl\")\n",
65+
" if not os.path.exists(version_path):\n",
66+
" raise ValueError(\"Version ID not found\")\n",
67+
" with open(version_path, 'rb') as f:\n",
68+
" df = pickle.load(f)\n",
69+
" self._load_data(df)\n",
70+
" self._save_current_version()\n",
71+
" \n",
72+
" def save_version(self, version_id):\n",
73+
" version_path = os.path.join(self.version_dir, f\"{version_id}.pkl\")\n",
74+
" with open(version_path, 'wb') as f:\n",
75+
" pickle.dump(self, f)\n",
76+
" \n",
77+
" def _generate_version_id(self):\n",
78+
" return datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
79+
"\n",
80+
" def get_current_version(self):\n",
81+
" if os.path.exists(self.current_version_path):\n",
82+
" with open(self.current_version_path, 'rb') as f:\n",
83+
" df = pickle.load(f)\n",
84+
" return df\n",
85+
" else:\n",
86+
" return None"
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": null,
92+
"metadata": {},
93+
"outputs": [
94+
{
95+
"name": "stdout",
96+
"output_type": "stream",
97+
"text": [
98+
"Initial DataFrame:\n",
99+
" id nom\n",
100+
"0 1 nour\n",
101+
"1 2 kobbi\n"
102+
]
103+
}
104+
],
105+
"source": [
106+
"\n",
107+
"df = GuepardDataFrame(pd.read_csv(\"data.csv\"), version_dir=\"./versions\")\n",
108+
"\n",
109+
"print(\"Initial DataFrame:\")\n",
110+
"print(df)\n",
111+
"\n",
112+
"initial_version_id = df.commit(\"Initial version\")\n",
113+
"df.save_version(initial_version_id)\n",
114+
"\n",
115+
"new_rows = pd.DataFrame({\n",
116+
" 'id': [3, 4],\n",
117+
" 'nom': ['alice', 'bob']\n",
118+
"})\n",
119+
"\n"
120+
]
121+
},
122+
{
123+
"cell_type": "code",
124+
"execution_count": null,
125+
"metadata": {},
126+
"outputs": [
127+
{
128+
"name": "stdout",
129+
"output_type": "stream",
130+
"text": [
131+
" id nom\n",
132+
"0 1 nour\n",
133+
"1 2 kobbi\n",
134+
"2 3 alice\n",
135+
"3 4 bob\n",
136+
" id nom\n",
137+
"0 1 nour\n",
138+
"1 2 kobbi\n",
139+
"2 3 alice\n",
140+
"3 4 bob\n"
141+
]
142+
}
143+
],
144+
"source": [
145+
"df=pd.concat([df, new_rows], ignore_index=True)\n",
146+
"df = GuepardDataFrame(data=df, version_dir=\"./versions\")\n",
147+
"print(df)"
148+
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": 26,
153+
"metadata": {},
154+
"outputs": [
155+
{
156+
"data": {
157+
"text/html": [
158+
"<div>\n",
159+
"<style scoped>\n",
160+
" .dataframe tbody tr th:only-of-type {\n",
161+
" vertical-align: middle;\n",
162+
" }\n",
163+
"\n",
164+
" .dataframe tbody tr th {\n",
165+
" vertical-align: top;\n",
166+
" }\n",
167+
"\n",
168+
" .dataframe thead th {\n",
169+
" text-align: right;\n",
170+
" }\n",
171+
"</style>\n",
172+
"<table border=\"1\" class=\"dataframe\">\n",
173+
" <thead>\n",
174+
" <tr style=\"text-align: right;\">\n",
175+
" <th></th>\n",
176+
" <th>id</th>\n",
177+
" <th>nom</th>\n",
178+
" </tr>\n",
179+
" </thead>\n",
180+
" <tbody>\n",
181+
" <tr>\n",
182+
" <th>0</th>\n",
183+
" <td>1</td>\n",
184+
" <td>nour</td>\n",
185+
" </tr>\n",
186+
" <tr>\n",
187+
" <th>1</th>\n",
188+
" <td>2</td>\n",
189+
" <td>kobbi</td>\n",
190+
" </tr>\n",
191+
" <tr>\n",
192+
" <th>2</th>\n",
193+
" <td>3</td>\n",
194+
" <td>alice</td>\n",
195+
" </tr>\n",
196+
" <tr>\n",
197+
" <th>3</th>\n",
198+
" <td>4</td>\n",
199+
" <td>bob</td>\n",
200+
" </tr>\n",
201+
" </tbody>\n",
202+
"</table>\n",
203+
"</div>"
204+
],
205+
"text/plain": [
206+
" id nom\n",
207+
"0 1 nour\n",
208+
"1 2 kobbi\n",
209+
"2 3 alice\n",
210+
"3 4 bob"
211+
]
212+
},
213+
"execution_count": 26,
214+
"metadata": {},
215+
"output_type": "execute_result"
216+
}
217+
],
218+
"source": [
219+
"df"
220+
]
221+
},
222+
{
223+
"cell_type": "code",
224+
"execution_count": 27,
225+
"metadata": {},
226+
"outputs": [],
227+
"source": [
228+
"# Commit the changes\n",
229+
"new_version_id = df.commit(\"Added specific rows\")\n",
230+
"df.save_version(new_version_id)"
231+
]
232+
},
233+
{
234+
"cell_type": "code",
235+
"execution_count": 28,
236+
"metadata": {},
237+
"outputs": [
238+
{
239+
"name": "stdout",
240+
"output_type": "stream",
241+
"text": [
242+
"\n",
243+
"Available versions:\n",
244+
"{'version_id': '20250328_033906', 'message': 'Initial version', 'timestamp': datetime.datetime(2025, 3, 28, 3, 39, 6, 470004)}\n",
245+
"{'version_id': '20250328_033915', 'message': 'Added specific rows', 'timestamp': datetime.datetime(2025, 3, 28, 3, 39, 15, 927749)}\n"
246+
]
247+
}
248+
],
249+
"source": [
250+
"# List versions\n",
251+
"print(\"\\nAvailable versions:\")\n",
252+
"for version in df.list_versions():\n",
253+
" print(version)"
254+
]
255+
},
256+
{
257+
"cell_type": "code",
258+
"execution_count": 33,
259+
"metadata": {},
260+
"outputs": [
261+
{
262+
"name": "stdout",
263+
"output_type": "stream",
264+
"text": [
265+
"\n",
266+
"DataFrame after rollback to version 20250328_033906:\n",
267+
" id nom\n",
268+
"0 1 nour\n",
269+
"1 2 kobbi\n",
270+
"2 3 alice\n",
271+
"3 4 bob\n"
272+
]
273+
}
274+
],
275+
"source": [
276+
"# Rollback to the initial version\n",
277+
"df.rollback(version_id='20250328_033915')\n",
278+
"print(f\"\\nDataFrame after rollback to version {initial_version_id}:\")\n",
279+
"print(df)"
280+
]
281+
},
282+
{
283+
"cell_type": "code",
284+
"execution_count": 34,
285+
"metadata": {},
286+
"outputs": [
287+
{
288+
"name": "stdout",
289+
"output_type": "stream",
290+
"text": [
291+
"\n",
292+
"Current Version DataFrame:\n",
293+
"None\n"
294+
]
295+
}
296+
],
297+
"source": [
298+
"current_version = df.get_current_version()\n",
299+
"print(\"\\nCurrent Version DataFrame:\")\n",
300+
"print(current_version)\n"
301+
]
302+
},
303+
{
304+
"cell_type": "code",
305+
"execution_count": null,
306+
"metadata": {},
307+
"outputs": [],
308+
"source": []
309+
}
310+
],
311+
"metadata": {
312+
"kernelspec": {
313+
"display_name": "Python 3",
314+
"language": "python",
315+
"name": "python3"
316+
},
317+
"language_info": {
318+
"codemirror_mode": {
319+
"name": "ipython",
320+
"version": 3
321+
},
322+
"file_extension": ".py",
323+
"mimetype": "text/x-python",
324+
"name": "python",
325+
"nbconvert_exporter": "python",
326+
"pygments_lexer": "ipython3",
327+
"version": "3.13.2"
328+
}
329+
},
330+
"nbformat": 4,
331+
"nbformat_minor": 2
332+
}

0 commit comments

Comments
 (0)