diff --git a/.gitignore b/.gitignore
index 2c9d39e..c4f4de9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,12 @@ venv/
 .venv
 ENV/
 env.bak/
-venv.bak/
\ No newline at end of file
+venv.bak/
+
+etl_data.db
+
+# Ignore Python cache files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
diff --git a/app/__pycache__/__init__.cpython-310.pyc b/app/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index c98e2b2..0000000
Binary files a/app/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/app/etl/__pycache__/__init__.cpython-310.pyc b/app/etl/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index 7dbe7b9..0000000
Binary files a/app/etl/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/app/etl/__pycache__/extract.cpython-310.pyc b/app/etl/__pycache__/extract.cpython-310.pyc
deleted file mode 100644
index 793239e..0000000
Binary files a/app/etl/__pycache__/extract.cpython-310.pyc and /dev/null differ
diff --git a/app/etl/__pycache__/extract.cpython-313.pyc b/app/etl/__pycache__/extract.cpython-313.pyc
deleted file mode 100644
index 10b5268..0000000
Binary files a/app/etl/__pycache__/extract.cpython-313.pyc and /dev/null differ
diff --git a/app/etl/__pycache__/load.cpython-313.pyc b/app/etl/__pycache__/load.cpython-313.pyc
deleted file mode 100644
index 2790389..0000000
Binary files a/app/etl/__pycache__/load.cpython-313.pyc and /dev/null differ
diff --git a/app/etl/__pycache__/transform.cpython-313.pyc b/app/etl/__pycache__/transform.cpython-313.pyc
deleted file mode 100644
index 4347539..0000000
Binary files a/app/etl/__pycache__/transform.cpython-313.pyc and /dev/null differ
diff --git a/app/etl/load.py b/app/etl/load.py
index 66a1284..4ae4589 100644
--- a/app/etl/load.py
+++ b/app/etl/load.py
@@ -21,8 +21,7 @@ def load(df: pd.DataFrame, db_path: str = "etl_data.db", table_name: str = "proc
     # Ensure directory exists
     db_dir = os.path.dirname(db_path)
     if db_dir and not os.path.exists(db_dir):
-        os.makedirs(db_dir)
-    
+        os.makedirs(db_dir) 
     conn = None
     try:
         # Connect to database
@@ -30,11 +29,9 @@ def load(df: pd.DataFrame, db_path: str = "etl_data.db", table_name: str = "proc
         cursor = conn.cursor()
         
         # TODO (Find & Fix): Table creation and schema logic missing
-        
-        # Idempotency check (should avoid duplicate inserts)
         cursor.execute(f"""
         CREATE TABLE IF NOT EXISTS {table_name} (
-            employee_id INTEGER PRIMARY KEY, 
+            employee_id TEXT PRIMARY KEY, 
             name TEXT,
             email TEXT,
             age INTEGER,
@@ -50,15 +47,11 @@ def load(df: pd.DataFrame, db_path: str = "etl_data.db", table_name: str = "proc
         )
         """)
 
-        data_to_insert = [tuple(row) for row in df.itertuples(index=False, name=None)]
         placeholders = ", ".join(["?"] * len(df.columns))
         column_names = ", ".join(df.columns)
-        sql_query = f"INSERT OR IGNORE INTO {table_name} ({column_names}) VALUES ({placeholders})"
-        cursor.executemany(sql_query, data_to_insert)
+        sql_query = f"INSERT OR REPLACE INTO {table_name} ({column_names}) VALUES ({placeholders})"  
+        cursor.executemany(sql_query, df.itertuples(index=False, name=None))
         conn.commit()
-        # TODO (Find & Fix): Bulk insert without checking for duplicates
-
-        
     except sqlite3.Error as e:
         if conn:
             conn.rollback()
diff --git a/app/main.py b/app/main.py
index e61920b..e881ef4 100644
--- a/app/main.py
+++ b/app/main.py
@@ -1,8 +1,13 @@
+import os
+
 from app.etl.extract import extract
 from app.etl.transform import transform
 from app.etl.load import load
 
-def run_pipeline(csv_path: str = "data.csv", db_path: str = "etl_data.db"):
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+data_path = os.path.join(BASE_DIR, "data.csv")
+
+def run_pipeline(csv_path: str =data_path, db_path: str = "etl_data.db"):
     """
     Run the complete ETL pipeline.