-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic_classification.py
More file actions
49 lines (41 loc) · 1.59 KB
/
basic_classification.py
File metadata and controls
49 lines (41 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""Basic example for using Octopus Classification."""
# This example demonstrates how to use Octopus to create a machine learning classification model.
# We will use the breast cancer dataset for this purpose.
# Please ensure your dataset is clean, with no missing values (`NaN`),
# and that all features are numeric.
### Necessary imports for this example
import os
from octopus.example_data import load_breast_cancer_data
from octopus.modules import Octo
from octopus.study import OctoClassification
### Load and Preprocess Data
df, features, targets = load_breast_cancer_data()
print("Dataset info:")
print(f" Features: {len(features)} - {features}")
print(f" Samples: {df.shape[0]}")
print(f" Classes: {len(targets)} - {targets}")
print(f" Target distribution: {df['target'].value_counts().sort_index().to_dict()}")
### Create and run OctoClassification
study = OctoClassification(
name="basic_classification",
path=os.environ.get("STUDIES_PATH", "./studies"),
target_metric="AUCROC",
feature_cols=features,
target_col="target",
sample_id_col="index",
stratification_col="target",
workflow=[
Octo(
description="step1_octo_full",
task_id=0,
depends_on=None, # First task, depends on input
models=["ExtraTreesClassifier"],
n_trials=100, # 100 trials for hyperparameter optimization
n_folds_inner=5, # 5 inner folds
max_features=30, # Use all 30 features
ensemble_selection=True, # Enable ensemble selection
),
],
)
study.fit(data=df)
print("Workflow completed")