FEAT: Enhance GuepardDataFrame with version management and rollback capabilities

kobbinour13 · kobbinour13 · commit 2cb40d1b3972 · 2025-03-28T03:43:02.000+01:00
diff --git a/guepard_pandas/data.csv b/guepard_pandas/data.csv
@@ -0,0 +1,3 @@
+id,nom
+1,nour
+2,kobbi
diff --git a/guepard_pandas/guepard_dataframe.ipynb b/guepard_pandas/guepard_dataframe.ipynb
@@ -0,0 +1,332 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "import pickle\n",
+    "from datetime import datetime\n",
+    "\n",
+    "class GuepardDataFrame(pd.DataFrame):\n",
+    "    def __init__(self, *args, **kwargs):\n",
+    "        version_dir = kwargs.pop('version_dir', './versions')\n",
+    "        super().__init__(*args, **kwargs)\n",
+    "        self.current_version_path = os.path.join(version_dir, 'current_version.pkl')\n",
+    "        self.version_dir = version_dir\n",
+    "        self.versions_meta_file = os.path.join(version_dir, 'versions_meta.pkl')\n",
+    "        if not os.path.exists(self.version_dir):\n",
+    "            os.makedirs(self.version_dir)\n",
+    "        if 'data' in kwargs:\n",
+    "            self._load_data(kwargs['data'])\n",
+    "        else:\n",
+    "            self._load_current_version()\n",
+    "    \n",
+    "    def _load_data(self, data):\n",
+    "        super().__init__(data)\n",
+    "    \n",
+    "    def _load_current_version(self):\n",
+    "        if os.path.exists(self.current_version_path):\n",
+    "            with open(self.current_version_path, 'rb') as f:\n",
+    "                df = pickle.load(f)\n",
+    "            super().__init__(df)\n",
+    "    \n",
+    "    def commit(self, message=\"\"):\n",
+    "        version_id = self._generate_version_id()\n",
+    "        self._save_current_version()\n",
+    "        self._store_version_meta(version_id, message)\n",
+    "        return version_id\n",
+    "    \n",
+    "    def _save_current_version(self):\n",
+    "        with open(self.current_version_path, 'wb') as f:\n",
+    "            pickle.dump(self, f)\n",
+    "    \n",
+    "    def _store_version_meta(self, version_id, message):\n",
+    "        versions_meta = self._load_versions_meta()\n",
+    "        versions_meta.append({'version_id': version_id, 'message': message, 'timestamp': datetime.now()})\n",
+    "        with open(self.versions_meta_file, 'wb') as f:\n",
+    "            pickle.dump(versions_meta, f)\n",
+    "    \n",
+    "    def _load_versions_meta(self):\n",
+    "        if os.path.exists(self.versions_meta_file):\n",
+    "            with open(self.versions_meta_file, 'rb') as f:\n",
+    "                return pickle.load(f)\n",
+    "        return []\n",
+    "    \n",
+    "    def list_versions(self):\n",
+    "        versions_meta = self._load_versions_meta()\n",
+    "        return [{'version_id': meta['version_id'], 'message': meta['message'], 'timestamp': meta['timestamp']} for meta in versions_meta]\n",
+    "    \n",
+    "    def rollback(self, version_id):\n",
+    "        version_path = os.path.join(self.version_dir, f\"{version_id}.pkl\")\n",
+    "        if not os.path.exists(version_path):\n",
+    "            raise ValueError(\"Version ID not found\")\n",
+    "        with open(version_path, 'rb') as f:\n",
+    "            df = pickle.load(f)\n",
+    "        self._load_data(df)\n",
+    "        self._save_current_version()\n",
+    "    \n",
+    "    def save_version(self, version_id):\n",
+    "        version_path = os.path.join(self.version_dir, f\"{version_id}.pkl\")\n",
+    "        with open(version_path, 'wb') as f:\n",
+    "            pickle.dump(self, f)\n",
+    "    \n",
+    "    def _generate_version_id(self):\n",
+    "        return datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
+    "\n",
+    "    def get_current_version(self):\n",
+    "        if os.path.exists(self.current_version_path):\n",
+    "            with open(self.current_version_path, 'rb') as f:\n",
+    "                df = pickle.load(f)\n",
+    "            return df\n",
+    "        else:\n",
+    "            return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Initial DataFrame:\n",
+      "   id    nom\n",
+      "0   1   nour\n",
+      "1   2  kobbi\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "df = GuepardDataFrame(pd.read_csv(\"data.csv\"), version_dir=\"./versions\")\n",
+    "\n",
+    "print(\"Initial DataFrame:\")\n",
+    "print(df)\n",
+    "\n",
+    "initial_version_id = df.commit(\"Initial version\")\n",
+    "df.save_version(initial_version_id)\n",
+    "\n",
+    "new_rows = pd.DataFrame({\n",
+    "    'id': [3, 4],\n",
+    "    'nom': ['alice', 'bob']\n",
+    "})\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   id    nom\n",
+      "0   1   nour\n",
+      "1   2  kobbi\n",
+      "2   3  alice\n",
+      "3   4    bob\n",
+      "   id    nom\n",
+      "0   1   nour\n",
+      "1   2  kobbi\n",
+      "2   3  alice\n",
+      "3   4    bob\n"
+     ]
+    }
+   ],
+   "source": [
+    "df=pd.concat([df, new_rows], ignore_index=True)\n",
+    "df = GuepardDataFrame(data=df, version_dir=\"./versions\")\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>nom</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>nour</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>kobbi</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>alice</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>bob</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id    nom\n",
+       "0   1   nour\n",
+       "1   2  kobbi\n",
+       "2   3  alice\n",
+       "3   4    bob"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Commit the changes\n",
+    "new_version_id = df.commit(\"Added specific rows\")\n",
+    "df.save_version(new_version_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Available versions:\n",
+      "{'version_id': '20250328_033906', 'message': 'Initial version', 'timestamp': datetime.datetime(2025, 3, 28, 3, 39, 6, 470004)}\n",
+      "{'version_id': '20250328_033915', 'message': 'Added specific rows', 'timestamp': datetime.datetime(2025, 3, 28, 3, 39, 15, 927749)}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# List versions\n",
+    "print(\"\\nAvailable versions:\")\n",
+    "for version in df.list_versions():\n",
+    "    print(version)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "DataFrame after rollback to version 20250328_033906:\n",
+      "   id    nom\n",
+      "0   1   nour\n",
+      "1   2  kobbi\n",
+      "2   3  alice\n",
+      "3   4    bob\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Rollback to the initial version\n",
+    "df.rollback(version_id='20250328_033915')\n",
+    "print(f\"\\nDataFrame after rollback to version {initial_version_id}:\")\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Current Version DataFrame:\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "current_version = df.get_current_version()\n",
+    "print(\"\\nCurrent Version DataFrame:\")\n",
+    "print(current_version)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/guepard_pandas/guepard_dataframe.py b/guepard_pandas/guepard_dataframe.py