Merge pull request Jacoo-Zhao#14 from Jacoo-Zhao/feature-cicd

Jacoo-Zhao · web-flow · commit 9a36cd005ad2 · 2025-05-08T18:28:29.000+10:00
fix: api server; pipeline refine
diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
@@ -34,4 +34,8 @@ jobs:
 
       - name: Run pipeline
         run: |
+#          clearml-agent daemon --queue "task" --detached
+#          python main.py
           python s1_dataset_artifact.py
+
+          
diff --git a/pipeline_from_tasks.py b/pipeline_from_tasks.py
@@ -30,7 +30,7 @@ def run_pipeline():
     pipe = PipelineController(
         name="AI_Studio_Pipeline_Demo", project="AI_Studio_Demo", version="0.0.1", add_pipeline_tags=False
     )
-    #
+
     # pipe.add_parameter(
     #     "url",
     #     "dataset_url",
@@ -72,9 +72,9 @@ def run_pipeline():
     )
 
     # for debugging purposes use local jobs
-    # pipe.start_locally()
+    pipe.start_locally()
 
     # Starting the pipeline (in the background)
     # pipe.start(queue="task")
-    pipe.start(queue="pipeline_controller")
-    print("done")
+    # pipe.start(queue="pipeline_controller")
+    # print("done")
diff --git a/s1_dataset_artifact.py b/s1_dataset_artifact.py
@@ -7,21 +7,14 @@
 task = Task.init(project_name="AI_Studio_Demo", task_name="Pipeline step 1 dataset artifact")
 
 # only create the task, we will actually execute it later
-# task.execute_remotely()
+task.execute_remotely()
 
-# Check if the local dataset file exists
-local_iris_csv_path = 'work_dataset/Iris.csv'
-if not os.path.exists(local_iris_csv_path):
-    print(f"Local file '{local_iris_csv_path}' not found. Downloading...")
-    local_iris_pkl = StorageManager.get_local_copy(
-        remote_url='https://github.com/allegroai/events/raw/master/odsc20-east/generic/iris_dataset.pkl'
-    )
-else:
-    print(f"Using existing local file: '{local_iris_csv_path}'")
+
+local_iris_pkl = StorageManager.get_local_copy(remote_url='https://github.com/allegroai/events/raw/master/odsc20-east/generic/iris_dataset.pkl')
 
 # Add and upload the dataset file
-task.upload_artifact('dataset', artifact_object=local_iris_csv_path)
-print('uploading artifacts in the background')
+# task.upload_artifact('dataset', artifact_object=local_iris_csv_path)
+task.upload_artifact('dataset', artifact_object=local_iris_pkl)
 
-# we are done
-print('Done')
+print('uploading artifacts in the background')
+print('Done🔥')
diff --git a/s2_data_preprocessing.py b/s2_data_preprocessing.py
@@ -1,19 +1,16 @@
 import pickle
 from clearml import Task, StorageManager
 from sklearn.model_selection import train_test_split
-import pandas as pd
-
+# import pandas as pd
+#
 # Connecting ClearML with the current process,
 # from here on everything is logged automatically
 task = Task.init(project_name="AI_Studio_Demo", task_name="Pipeline step 2 process dataset")
 
 # program arguments
-# Use either dataset_task_id to point to a tasks artifact or
-# use a direct url with dataset_url
+# Use either dataset_task_id to point to a tasks artifact or use a direct url with dataset_url
 args = {
-    'dataset_task_id': '3ceaa4409a70486b846e35bcbf229eab', #update id if it needs running locally
-    # 'dataset_task_id': '',  # update id if it needs running locally
-    'dataset_url': '',
+    'dataset_task_id': '',
     'random_state': 42,
     'test_size': 0.2,
 }
@@ -23,31 +20,24 @@
 print('Arguments: {}'.format(args))
 
 # only create the task, we will actually execute it later
-# task.execute_remotely()
+task.execute_remotely()
+#
 
 # get dataset from task's artifact
 if args['dataset_task_id']:
     dataset_upload_task = Task.get_task(task_id=args['dataset_task_id'])
     print('Input task id={} artifacts {}'.format(args['dataset_task_id'], list(dataset_upload_task.artifacts.keys())))
     # download the artifact
-    iris_csv = dataset_upload_task.artifacts['dataset'].get_local_copy()
-# # get the dataset from a direct url
-# elif args['dataset_url']:
-#     iris_pickle = StorageManager.get_local_copy(remote_url=args['dataset_url'])
+    iris_pickle = dataset_upload_task.artifacts['dataset'].get_local_copy()
 else:
     raise ValueError("Missing dataset link")
 
-iris_df = pd.read_csv(iris_csv)
-
+# open the local copy
+iris = pickle.load(open(iris_pickle, 'rb'))
 
 # "process" data
-# Extract features (X) and target (y)
-X = iris_df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']].values
-y = iris_df['Species'].astype('category').cat.codes.values  # Convert species to numeric codes
-
-species_mapping = dict(enumerate(iris_df['Species'].astype('category').cat.categories))
-print(species_mapping)
-
+X = iris.data
+y = iris.target
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=args['test_size'], random_state=args['random_state'])
 
@@ -59,4 +49,4 @@
 task.upload_artifact('y_test', y_test)
 
 print('Notice, artifacts are uploaded in the background')
-print('Done')
+print('Done🔥')
diff --git a/s3_train_model.py b/s3_train_model.py
@@ -1,32 +1,119 @@
+
+# task.connect(args)
+#
+# # only create the task, we will actually execute it later
+# # task.execute_remotely() # After passing local testing, you should uncomment this command to initial task to ClearML
+#
+# print('Retrieving Iris dataset')
+# dataset_task = Task.get_task(task_id=args['dataset_task_id'])
+# X_train = dataset_task.artifacts['X_train'].get()
+# X_test = dataset_task.artifacts['X_test'].get()
+# y_train = dataset_task.artifacts['y_train'].get()
+# y_test = dataset_task.artifacts['y_test'].get()
+# print('Iris dataset loaded')
+#
+#
+# # Define a simple neural network
+# class SimpleNN(nn.Module):
+#     def __init__(self, input_size, num_classes):
+#         super(SimpleNN, self).__init__()
+#         self.fc1 = nn.Linear(input_size, 50)
+#         self.fc2 = nn.Linear(50, num_classes)
+#
+#     def forward(self, x):
+#         x = torch.relu(self.fc1(x))
+#         x = self.fc2(x)
+#         return x
+#
+# # Convert data to PyTorch tensors
+# X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
+# y_train_tensor = torch.tensor(y_train, dtype=torch.long)
+# X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
+# y_test_tensor = torch.tensor(y_test, dtype=torch.long)
+#
+# # Create DataLoader
+# train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
+# train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)
+# # Hyperparameters
+# # Initialize the model, loss function, and optimizer
+# model = SimpleNN(input_size=X_train.shape[1], num_classes=len(set(y_train)))
+# criterion = nn.CrossEntropyLoss()
+# optimizer = optim.Adam(
+#     model.parameters(),
+#     lr=args['learning_rate'],
+#     weight_decay=args['weight_decay']
+# )
+#
+# for epoch in tqdm(range(args['num_epochs']), desc='Training Epochs'):
+#     epoch_loss = 0.0
+#
+#     for inputs, labels in train_loader:
+#         optimizer.zero_grad()
+#         outputs = model(inputs)
+#         loss = criterion(outputs, labels)
+#         loss.backward()
+#         optimizer.step()
+#
+#         # 累积 loss
+#         epoch_loss += loss.item()
+#
+#     avg_loss = epoch_loss / len(train_loader)
+#     logger.report_scalar(title='train', series='epoch_loss', value=avg_loss, iteration=epoch)
+#
+# # Save model
+# model_path = 'assets/model.pkl'
+# torch.save(model.state_dict(), model_path)
+# task.upload_artifact(name='model', artifact_object=model_path)
+# print('Model saved and uploaded as artifact')
+#
+# # Load model for evaluation
+# model.load_state_dict(torch.load(model_path))
+# model.eval()
+# with torch.no_grad():
+#     outputs = model(X_test_tensor)
+#     _, predicted = torch.max(outputs, 1)
+#     accuracy = (predicted == y_test_tensor).float().mean().item()
+#     logger.report_scalar("validation_accuracy", "score", value=accuracy, iteration=0)
+#
+# print(f'Model trained & stored with accuracy: {accuracy:.4f}')
+#
+#
+# # Plotting confusion matrix
+# species_mapping = {0: 'Setosa', 1: 'Versicolor', 2: 'Virginica'}
+# y_test_names = [species_mapping[label.item()] for label in y_test]
+# predicted_names = [species_mapping[label.item()] for label in predicted]
+#
+# cm = confusion_matrix(y_test_names, predicted_names, labels=list(species_mapping.values()))
+# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(species_mapping.values()))
+# disp.plot(cmap=plt.cm.Blues)
+#
+# plt.title('Confusion Matrix')
+# plt.savefig('figs/confusion_matrix.png')
+#
+# print('Confusion matrix plotted and saved as confusion_matrix.png')
+
 import matplotlib.pyplot as plt
+import numpy as np
 from clearml import Task, Logger
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, TensorDataset
-from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
-from tqdm import tqdm
+
 
 # Connecting ClearML with the current process,
+# from here on everything is logged automatically
 task = Task.init(project_name="AI_Studio_Demo", task_name="Pipeline step 3 train model")
 logger = Logger.current_logger()
 
+# Arguments
 args = {
-    'dataset_task_id': '86f09f66d88c4ec7819f05b086deea15', # update id if it needs running locally
-    'num_epochs': 20,
-    'batch_size': 16,
-    'dataset_task_id': '',
-
-    # ✅ HPO
-    'learning_rate': 1e-3,
-    'weight_decay': 1e-5,
+    'dataset_task_id': '', # replace the value only when you need debug locally
 }
-
 task.connect(args)
 
 # only create the task, we will actually execute it later
-# task.execute_remotely() # After passing local testing, you should uncomment this command to initial task to ClearML
-
+task.execute_remotely() # After passing local testing, you should uncomment this command to initial task to ClearML
 
 print('Retrieving Iris dataset')
 dataset_task = Task.get_task(task_id=args['dataset_task_id'])
@@ -36,7 +123,6 @@
 y_test = dataset_task.artifacts['y_test'].get()
 print('Iris dataset loaded')
 
-
 # Define a simple neural network
 class SimpleNN(nn.Module):
     def __init__(self, input_size, num_classes):
@@ -57,61 +143,51 @@ def forward(self, x):
 
 # Create DataLoader
 train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
-train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)
-# Hyperparameters
+train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
+
 # Initialize the model, loss function, and optimizer
 model = SimpleNN(input_size=X_train.shape[1], num_classes=len(set(y_train)))
 criterion = nn.CrossEntropyLoss()
-optimizer = optim.Adam(
-    model.parameters(),
-    lr=args['learning_rate'],
-    weight_decay=args['weight_decay']
-)
-
-for epoch in tqdm(range(args['num_epochs']), desc='Training Epochs'):
-    epoch_loss = 0.0
+optimizer = optim.Adam(model.parameters(), lr=0.001)
 
+# Train the model
+num_epochs = 20
+for epoch in range(num_epochs):
     for inputs, labels in train_loader:
         optimizer.zero_grad()
         outputs = model(inputs)
         loss = criterion(outputs, labels)
         loss.backward()
         optimizer.step()
+        logger.report_scalar(title='train', series='loss', value=loss.item(), iteration=epoch)
 
-        # 累积 loss
-        epoch_loss += loss.item()
-
-    avg_loss = epoch_loss / len(train_loader)
-    logger.report_scalar(title='train', series='epoch_loss', value=avg_loss, iteration=epoch)
-
-# Save model
-model_path = 'assets/model.pkl'
-torch.save(model.state_dict(), model_path)
-task.upload_artifact(name='model', artifact_object=model_path)
-print('Model saved and uploaded as artifact')
-
-# Load model for evaluation
-model.load_state_dict(torch.load(model_path))
+# Evaluate the model
 model.eval()
 with torch.no_grad():
     outputs = model(X_test_tensor)
     _, predicted = torch.max(outputs, 1)
     accuracy = (predicted == y_test_tensor).float().mean().item()
-    logger.report_scalar("validation_accuracy", "score", value=accuracy, iteration=0)
 
 print(f'Model trained & stored with accuracy: {accuracy:.4f}')
 
+# Plotting (same as before)
+x_min, x_max = X_test[:, 0].min() - .5, X_test[:, 0].max() + .5
+y_min, y_max = X_test[:, 1].min() - .5, X_test[:, 1].max() + .5
+h = .02  # step size in the mesh
+xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+plt.figure(1, figsize=(4, 3))
+
 
-# Plotting confusion matrix
-species_mapping = {0: 'Setosa', 1: 'Versicolor', 2: 'Virginica'}
-y_test_names = [species_mapping[label.item()] for label in y_test]
-predicted_names = [species_mapping[label.item()] for label in predicted]
+plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, edgecolors='k', cmap=plt.cm.Paired)
+plt.xlabel('Sepal length')
+plt.ylabel('Sepal width')
 
-cm = confusion_matrix(y_test_names, predicted_names, labels=list(species_mapping.values()))
-disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(species_mapping.values()))
-disp.plot(cmap=plt.cm.Blues)
+plt.xlim(xx.min(), xx.max())
+plt.ylim(yy.min(), yy.max())
+plt.xticks(())
+plt.yticks(())
 
-plt.title('Confusion Matrix')
-plt.savefig('figs/confusion_matrix.png')
+plt.title('Iris Types')
+plt.savefig('iris_plot.png')
 
-print('Confusion matrix plotted and saved as confusion_matrix.png')
+print('Done🔥')