diff --git a/.gitignore b/.gitignore
index b512c09..2c9d39e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,9 @@
-node_modules
\ No newline at end of file
+node_modules
+# Ignore virtual environments
+myenv/
+venv/
+.env
+.venv
+ENV/
+env.bak/
+venv.bak/
\ No newline at end of file
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..59d9f98
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1 @@
+# i just created this file so python know this directory is package and can import from it (needed to import extract function in root/tests/test_extract.py)
\ No newline at end of file
diff --git a/app/__pycache__/__init__.cpython-310.pyc b/app/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..c98e2b2
Binary files /dev/null and b/app/__pycache__/__init__.cpython-310.pyc differ
diff --git a/app/etl/__init__.py b/app/etl/__init__.py
new file mode 100644
index 0000000..59d9f98
--- /dev/null
+++ b/app/etl/__init__.py
@@ -0,0 +1 @@
+# i just created this file so python know this directory is package and can import from it (needed to import extract function in root/tests/test_extract.py)
\ No newline at end of file
diff --git a/app/etl/__pycache__/__init__.cpython-310.pyc b/app/etl/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..7dbe7b9
Binary files /dev/null and b/app/etl/__pycache__/__init__.cpython-310.pyc differ
diff --git a/app/etl/__pycache__/extract.cpython-310.pyc b/app/etl/__pycache__/extract.cpython-310.pyc
new file mode 100644
index 0000000..793239e
Binary files /dev/null and b/app/etl/__pycache__/extract.cpython-310.pyc differ
diff --git a/app/etl/extract.py b/app/etl/extract.py
index 94714f2..bdf0f68 100644
--- a/app/etl/extract.py
+++ b/app/etl/extract.py
@@ -21,7 +21,7 @@ def extract(path: str = "xyz.csv") -> pd.DataFrame :
     if not os.path.exists(path):
         raise FileNotFoundError(f"❌ File not found: {path}")
     
-    if not path.lower().endswith('.csv'):  # TODO (Find & Fix)
+    if not str(path).lower().endswith('.csv'):
         raise ValueError(f"❌ File must be a CSV: {path}")
     
     try:
@@ -32,12 +32,12 @@ def extract(path: str = "xyz.csv") -> pd.DataFrame :
         for encoding in encodings:
             try:
                 # TODO (Find & Fix)
-                pass
+                pass 
             except UnicodeDecodeError:
                 print(f"Failed to read with encoding '{encoding}'")  # Log the encoding that failed
         
         if df is None:
-            raise ValueError(f" Could not read CSV with tried encodings: {encodings}")
+            raise ValueError(f"Could not read CSV with tried encodings: {encodings}")
         
         # Validate data
         if df.empty:
@@ -50,5 +50,3 @@ def extract(path: str = "xyz.csv") -> pd.DataFrame :
         raise ValueError("❌ File contains no data")
     except pd.errors.ParserError as e:
         raise ValueError(f"❌ Error parsing CSV: {e}")
-    except Exception as e:
-        raise ValueError(f"❌ Unexpected error reading file: {e}")
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..59d9f98
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# i just created this file so python know this directory is package and can import from it (needed to import extract function in root/tests/test_extract.py)
\ No newline at end of file
diff --git a/tests/__pycache__/__init__.cpython-310.pyc b/tests/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..cf1af23
Binary files /dev/null and b/tests/__pycache__/__init__.cpython-310.pyc differ
diff --git a/tests/__pycache__/test_extract.cpython-310-pytest-8.4.2.pyc b/tests/__pycache__/test_extract.cpython-310-pytest-8.4.2.pyc
new file mode 100644
index 0000000..dbb29cd
Binary files /dev/null and b/tests/__pycache__/test_extract.cpython-310-pytest-8.4.2.pyc differ
diff --git a/tests/data/empty.csv b/tests/data/empty.csv
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/invalid_path.txt b/tests/data/invalid_path.txt
new file mode 100644
index 0000000..20461b3
--- /dev/null
+++ b/tests/data/invalid_path.txt
@@ -0,0 +1 @@
+this is a dummy file for unit tests
\ No newline at end of file
diff --git a/tests/data/valid.csv b/tests/data/valid.csv
new file mode 100644
index 0000000..5c5fd75
--- /dev/null
+++ b/tests/data/valid.csv
@@ -0,0 +1,3 @@
+name,age,city
+Alice,25,Paris
+Bob,30,London
\ No newline at end of file
diff --git a/tests/test_extract.py b/tests/test_extract.py
new file mode 100644
index 0000000..85c1f52
--- /dev/null
+++ b/tests/test_extract.py
@@ -0,0 +1,40 @@
+from pathlib import Path
+import pytest
+import re
+import pandas as pd
+from app.etl.extract import extract
+
+def test_empty_file_path():
+    path = ""
+    with pytest.raises(FileNotFoundError, match=f"❌ File not found: {path}"):
+        extract(path)
+
+def test_invalid_file_extension():
+    path = "./tests/data/invalid_path.txt"
+    with pytest.raises(ValueError, match="File must be a CSV"):
+        extract(path)
+
+def test_invalid_encoding(tmp_path):
+    bad_file = tmp_path / "invalid_encoding.csv"
+    bad_file.write_bytes(b"\xff\xfe\x00\x00\xff")
+    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
+    with pytest.raises(ValueError, match=re.escape(f"Could not read CSV with tried encodings: {encodings}")):
+        extract(bad_file)
+
+def test_empty_file():
+    path = "./tests/data/empty.csv"
+    with pytest.raises(ValueError, match="File contains no data"):
+        extract(path)
+
+def test_valid_file(tmp_path):
+    csv_file = tmp_path / "valid.csv"
+    csv_data = "name,age,city\nAlice,25,Paris\nBob,30,London\n"
+    csv_file.write_text(csv_data, encoding="utf-8")
+    df = extract(csv_file)
+    expected_df = pd.DataFrame({
+        "name": ["Alice", "Bob"],
+        "age": [25, 30],
+        "city": ["Paris", "London"]
+    })
+    assert isinstance(df, pd.DataFrame)
+    pd.testing.assert_frame_equal(df.reset_index(drop=True), expected_df)