Skip to content

Commit ef8bb7b

Browse files
Merge pull request #41 from pushpam345/feat/logging-new
feat: implemented logging
2 parents aa97624 + ccb4d02 commit ef8bb7b

File tree

4 files changed

+59
-43
lines changed

4 files changed

+59
-43
lines changed

app/etl/extract.py

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,79 @@
11
import pandas as pd
22
import os
3+
import logging as lg
34

4-
# Get the base directory (app/) relative to this file (app/etl/extract.py)
5-
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
6-
DEFAULT_DATA_PATH = os.path.join(BASE_DIR, "data.csv")
5+
# TODO (Find & Fix)
76

8-
def extract(path: str = DEFAULT_DATA_PATH) -> pd.DataFrame :
7+
8+
logger = lg.getLogger(__name__)
9+
logger.setLevel(lg.DEBUG)
10+
11+
12+
def extract(path: str = "xyz.csv") -> pd.DataFrame:
913
"""
1014
Extracts data from CSV, Excel, or JSON file.
11-
15+
1216
Args:
1317
path: Path to the data file (supports .csv, .xlsx, .json)
14-
18+
1519
Returns:
1620
pd.DataFrame: DataFrame containing the extracted data
17-
21+
1822
Raises:
1923
FileNotFoundError: If the file doesn't exist
2024
ValueError: If the file is empty or invalid
2125
"""
2226
# Validate file path
2327
if not os.path.exists(path):
2428
raise FileNotFoundError(f"❌ File not found: {path}")
25-
29+
2630
# Get file extension
2731
ext = os.path.splitext(path)[-1].lower()
28-
32+
2933
# Check if file format is supported
3034
if ext not in ['.csv', '.xlsx', '.xls', '.json']:
3135
raise ValueError(f"Unsupported file format: {ext}")
32-
36+
3337
try:
3438
if ext == '.csv':
3539
# Try different encodings for CSV files
3640
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
3741
df = None
38-
42+
3943
for encoding in encodings:
4044
try:
4145
df = pd.read_csv(path, encoding=encoding)
42-
print(f"Successfully read CSV with encoding: {encoding}")
46+
logger.info(f"Successfully read CSV with encoding: {encoding}")
4347
break
4448
except UnicodeDecodeError:
45-
print(f"Failed to read with encoding '{encoding}'")
49+
logger.error(f"Failed to read with encoding '{encoding}'")
4650
continue
4751
except Exception as e:
48-
print(f"Error reading with encoding '{encoding}': {e}")
52+
logger.error(
53+
f"Error reading with encoding '{encoding}': {e}")
4954
continue
50-
55+
5156
if df is None:
52-
raise ValueError(f"Could not read CSV with tried encodings: {encodings}")
53-
57+
raise ValueError(
58+
f"Could not read CSV with tried encodings: {encodings}")
59+
5460
elif ext in ['.xls', '.xlsx']:
5561
df = pd.read_excel(path)
56-
print(f"Successfully read Excel file: {path}")
57-
62+
logger.info(f"Successfully read Excel file: {path}")
63+
5864
elif ext == '.json':
5965
df = pd.read_json(path)
60-
print(f"Successfully read JSON file: {path}")
61-
66+
logger.info(f"Successfully read JSON file: {path}")
67+
6268
# Validate data
6369
if df.empty:
6470
raise ValueError("File contains no data")
65-
66-
print(f"✅ Extracted {len(df)} rows and {len(df.columns)} columns") # TODO: Use logging instead of print
71+
72+
# TODO: Use logging instead of print
73+
logger.info(
74+
f"✅ Extracted {len(df)} rows and {len(df.columns)} columns")
6775
return df
68-
76+
6977
except pd.errors.EmptyDataError:
7078
raise ValueError("❌ File contains no data")
7179
except pd.errors.ParserError as e:

app/etl/load.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import pandas as pd
22
import sqlite3
33
import os
4+
import logging as lg
45
# TODO (Find & Fix)
6+
logger=lg.getLogger(__name__)
7+
logger.setLevel(lg.DEBUG)
8+
59

610
def load(df: pd.DataFrame, db_path: str = "etl_data.db", table_name: str = "processed_data"):
711
"""
@@ -13,10 +17,10 @@ def load(df: pd.DataFrame, db_path: str = "etl_data.db", table_name: str = "proc
1317
table_name: Name of the table to create/update
1418
"""
1519
if df.empty:
16-
print("⚠️ Warning: Empty DataFrame received, nothing to load") # TODO (Find & Fix)
20+
logger.warning("⚠️ Warning: Empty DataFrame received, nothing to load") # TODO (Find & Fix)
1721
return
1822

19-
print(f"🔄 Loading {len(df)} rows into database '{db_path}'") # TODO (Find & Fix)
23+
logger.info(f"🔄 Loading {len(df)} rows into database '{db_path}'") # TODO (Find & Fix)
2024

2125
# Ensure directory exists
2226
db_dir = os.path.dirname(db_path)

app/etl/transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pandas as pd
2+
import logging as lg
23
# TODO (Find & Fix)
34

45
def _remove_duplicates(df: pd.DataFrame) -> pd.DataFrame:

app/main.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import os
2-
32
from app.etl.extract import extract
43
from app.etl.transform import transform
54
from app.etl.load import load
5+
import logging as lg
6+
lg.basicConfig(level=lg.debug())
7+
8+
logger = lg.getLogger(__name__)
69

710
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
811
data_path = os.path.join(BASE_DIR, "data.csv")
@@ -16,34 +19,34 @@ def run_pipeline(csv_path: str =data_path, db_path: str = "etl_data.db"):
1619
db_path: Path to the output SQLite database
1720
"""
1821
try:
19-
print("🚀 Starting ETL Pipeline") # TODO (Find & Fix): Use logging instead of print
20-
print(f"📁 Input file: {csv_path}")
21-
print(f"🗄️ Output database: {db_path}")
22-
print("-" * 50)
22+
lg.info("🚀 Starting ETL Pipeline") # TODO (Find & Fix): Use logging instead of print
23+
lg.info(f"📁 Input file: {csv_path}")
24+
lg.info(f"🗄️ Output database: {db_path}")
25+
lg.info("-" * 50)
2326

2427
# Extract
25-
print("📥 STEP 1: EXTRACT")
28+
lg.info("📥 STEP 1: EXTRACT")
2629
df = extract(csv_path)
27-
print(f"✅ Extracted {len(df)} rows")
28-
print(f"📊 Columns: {list(df.columns)}")
29-
print()
30+
lg.info(f"✅ Extracted {len(df)} rows")
31+
lg.info(f"📊 Columns: {list(df.columns)}")
32+
lg.info()
3033

3134
# Transform
32-
print("🔄 STEP 2: TRANSFORM")
35+
lg.info("🔄 STEP 2: TRANSFORM")
3336
df_transformed = transform(df)
34-
print(f"✅ Transformed data ready")
35-
print()
37+
lg.info(f"✅ Transformed data ready")
38+
lg.info()
3639

3740
# Load
38-
print("📤 STEP 3: LOAD")
41+
lg.info("📤 STEP 3: LOAD")
3942
load(df_transformed, db_path)
40-
print()
43+
lg.info()
4144

42-
print("🎉 ETL Pipeline completed successfully!")
43-
print(f"📈 Final dataset: {len(df_transformed)} rows, {len(df_transformed.columns)} columns")
45+
lg.info("🎉 ETL Pipeline completed successfully!")
46+
lg.info(f"📈 Final dataset: {len(df_transformed)} rows, {len(df_transformed.columns)} columns")
4447

4548
except FileNotFoundError as e:
46-
print(f"❌ File Error: {e}")
49+
lg.error(f"❌ File Error: {e}")
4750

4851
except ValueError as e:
4952
# TODO (Find & Fix): Error handling missing

0 commit comments

Comments
 (0)