Skip to content

Commit 053e06f

Browse files
author
José Mendes
committed
Format Python Files
1 parent ff6ae69 commit 053e06f

File tree

3 files changed

+42
-25
lines changed

3 files changed

+42
-25
lines changed

Airflow/dags/minio-dag.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
import pkg_resources
1111
import logging
1212

13-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13+
logging.basicConfig(level=logging.INFO,
14+
format='%(asctime)s - %(levelname)s - %(message)s')
15+
1416

1517
def check_for_new_files():
1618
"""
@@ -32,6 +34,7 @@ def check_for_new_files():
3234
logging.info(f"New files detected: {[record[1] for record in records]}")
3335
return records
3436

37+
3538
def process_files(**kwargs):
3639
"""
3740
Processes the files detected from the database and marks them as processed.
@@ -47,7 +50,7 @@ def process_files(**kwargs):
4750
for record in records:
4851
key, value = record
4952
logging.info(f"Processing file: {key}, uploaded at {value}")
50-
53+
5154
bucketName, fileName = key.split('/', 1)
5255
logging.info(f"{bucketName},{fileName}")
5356

@@ -62,17 +65,22 @@ def process_files(**kwargs):
6265
pg_hook.run(update_sql)
6366

6467
# Function to install a library if not already installed
68+
69+
6570
def install(package, version="7.0.3"):
6671
try:
6772
# Check if the package is installed
6873
pkg_resources.get_distribution(package)
6974
logging.info(f"{package} is already installed.")
7075
except pkg_resources.DistributionNotFound:
7176
logging.info(f"{package} not found. Installing version {version}...")
72-
subprocess.check_call([sys.executable, "-m", "pip", "install", f"{package}=={version}"])
77+
subprocess.check_call(
78+
[sys.executable, "-m", "pip", "install", f"{package}=={version}"])
79+
7380

7481
def fileProcessor(bucket_name, object_name):
75-
install('minio', version="7.0.3") # Install compatible Minio version if needed
82+
# Install compatible Minio version if needed
83+
install('minio', version="7.0.3")
7684
from minio import Minio
7785
from minio.error import S3Error
7886

@@ -97,7 +105,8 @@ def fileProcessor(bucket_name, object_name):
97105
try:
98106
# Download the object from MinIO to a local file
99107
minio_client.fget_object(bucket_name, object_name, local_file_path)
100-
logging.info(f"File '{object_name}' successfully downloaded from MinIO.")
108+
logging.info(
109+
f"File '{object_name}' successfully downloaded from MinIO.")
101110

102111
# Process the file here
103112
sendProcessedInfoToDb(local_file_path)
@@ -111,13 +120,15 @@ def fileProcessor(bucket_name, object_name):
111120
except Exception as e:
112121
logging.info(f"Unexpected error: {e}")
113122

123+
114124
def sendProcessedInfoToDb(localFilePath):
115125
dataCoffeePrice = pd.read_csv(localFilePath)
116126

117-
coffeePrice = dataCoffeePrice ["value"]
118-
atTime = dataCoffeePrice ["date"]
127+
coffeePrice = dataCoffeePrice["value"]
128+
atTime = dataCoffeePrice["date"]
119129

120-
logging.info(f"The coffee price is {coffeePrice.iloc[0]} at date {atTime.iloc[0]}.")
130+
logging.info(
131+
f"The coffee price is {coffeePrice.iloc[0]} at date {atTime.iloc[0]}.")
121132

122133
pg_hook = PostgresHook(postgres_conn_id='postgres')
123134

@@ -130,7 +141,8 @@ def sendProcessedInfoToDb(localFilePath):
130141
pg_hook.run(insertSql)
131142

132143
logging.info("Newest price instance added to the coffee_price table")
133-
144+
145+
134146
# Define default arguments for the DAG
135147
default_args = {
136148
'owner': 'your_name',
@@ -165,5 +177,3 @@ def sendProcessedInfoToDb(localFilePath):
165177

166178
# Define task dependencies
167179
check_files_task >> process_files_task
168-
169-

src/download_from_minio.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from minio import Minio
22
from minio.error import S3Error
33

4+
45
def download_from_minio(bucket_name, object_name):
56
minio_client = Minio(
67
"minio:9000", # MinIO server address
@@ -21,4 +22,4 @@ def download_from_minio(bucket_name, object_name):
2122
minio_client.fget_object(bucket_name, object_name, object_name)
2223
print(f"File {object_name} successfully downloaded from MinIO.")
2324
except S3Error as e:
24-
print(f"Error occurred: {e}")
25+
print(f"Error occurred: {e}")

src/main.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def connect_to_postgres():
1515
print("Connected to Postgres")
1616
return conn
1717

18+
1819
def create_tables(conn):
1920
cursor = conn.cursor()
2021

@@ -43,7 +44,7 @@ def create_tables(conn):
4344
consumption FLOAT
4445
);
4546
""")
46-
47+
4748
cursor.execute("""
4849
CREATE TABLE IF NOT EXISTS coffee_production (
4950
id SERIAL PRIMARY KEY,
@@ -52,16 +53,17 @@ def create_tables(conn):
5253
production FLOAT
5354
);
5455
""")
55-
56+
5657
cursor.execute("""
5758
CREATE TABLE IF NOT EXISTS coffee_price(
5859
date DATE,
5960
price REAL
6061
);
6162
""")
62-
63+
6364
conn.commit()
6465

66+
6567
def populate_tables(conn, filenames):
6668

6769
cursor = conn.cursor()
@@ -70,7 +72,8 @@ def populate_tables(conn, filenames):
7072
dataFrameProduction = pd.read_csv(filenames[1])
7173
country_number = 0
7274
for country in dataFrameComsumption["Country"]:
73-
cursor.execute("INSERT INTO countries (country, coffeeType) VALUES (%s, %s) ON CONFLICT (country) DO NOTHING RETURNING country_id", (country, dataFrameComsumption["Coffee type"][country_number]))
75+
cursor.execute("INSERT INTO countries (country, coffeeType) VALUES (%s, %s) ON CONFLICT (country) DO NOTHING RETURNING country_id",
76+
(country, dataFrameComsumption["Coffee type"][country_number]))
7477

7578
result = cursor.fetchone() # Get the inserted country_id
7679
if result is None:
@@ -81,26 +84,29 @@ def populate_tables(conn, filenames):
8184
country_id = result[0]
8285

8386
years = dataFrameComsumption.columns[2:-1]
84-
87+
8588
for year in years:
86-
cursor.execute ("INSERT INTO coffee_domestic_consumption (country_id, year, consumption) VALUES (%s, %s, %s)", (country_id , year, int(dataFrameComsumption[year][country_number])))
89+
cursor.execute("INSERT INTO coffee_domestic_consumption (country_id, year, consumption) VALUES (%s, %s, %s)",
90+
(country_id, year, int(dataFrameComsumption[year][country_number])))
8791

88-
cursor.execute ("INSERT INTO coffee_production (country_id, year, production) VALUES (%s, %s, %s)", (country_id , year, int(dataFrameProduction[year][country_number])))
92+
cursor.execute("INSERT INTO coffee_production (country_id, year, production) VALUES (%s, %s, %s)",
93+
(country_id, year, int(dataFrameProduction[year][country_number])))
8994

9095
country_number += 1
91-
96+
9297
conn.commit()
9398

99+
94100
if __name__ == "__main__":
95101

96-
fileNames =["Coffee_domestic_consumption.csv", "Coffee_production.csv"]
97-
102+
fileNames = ["Coffee_domestic_consumption.csv", "Coffee_production.csv"]
103+
98104
for fileName in fileNames:
99105
if not os.path.exists(fileName):
100-
download_from_minio("coffee-dataset-example", fileName) # Update with your MinIO bucket name
101-
106+
# Update with your MinIO bucket name
107+
download_from_minio("coffee-dataset-example", fileName)
102108

103109
connection = connect_to_postgres()
104110
create_tables(connection)
105-
111+
106112
populate_tables(connection, fileNames)

0 commit comments

Comments
 (0)