-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
107 lines (81 loc) · 2.99 KB
/
main.py
File metadata and controls
107 lines (81 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
from dotenv import load_dotenv
def run_script(script_name: str, description: str) -> bool:
print(f"Running {description}...")
try:
result = None
if script_name == "ingest":
import ingest
result = ingest.main()
elif script_name == "embed_and_ingest":
import embed_and_ingest
result = embed_and_ingest.main()
elif script_name == "detect_distance":
import detect_distance
result = detect_distance.main()
elif script_name == "detect_dbscan":
import detect_dbscan
result = detect_dbscan.main()
else:
print(f"Unknown script: {script_name}")
return False
# Check if script returned False (failure)
if result is False:
print(f"{description} failed!")
print()
return False
print(f"{description} completed successfully!")
print()
return True
except Exception as e:
print(f"{description} failed: {e}")
print()
return False
def check_requirements() -> bool:
print("Checking requirements...")
if not os.path.exists(".env"):
print("Error: .env file not found!")
print("Create .env with OPENAI_API_KEY, QDRANT_URL, and QDRANT_API_KEY")
return False
load_dotenv()
required_vars = ["OPENAI_API_KEY", "QDRANT_URL", "QDRANT_API_KEY"]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"Missing environment variables: {', '.join(missing_vars)}")
return False
print("Requirements check passed!")
print()
return True
def main():
print("=" * 70)
print("HDFS ANOMALY DETECTION PIPELINE")
print("=" * 70)
print("Dataset: 2000 points (80% normal, 20% abnormal)")
print("Model: OpenAI text-embedding-3-small (1536D)")
if not check_requirements():
return
if not run_script("ingest", "Data Ingestion"):
print("Pipeline failed at data ingestion step")
return
if not run_script("embed_and_ingest", "Embedding Creation and Qdrant Upload"):
print("Pipeline failed at embedding step")
return
detection_method = os.getenv("DETECTION_METHOD", "distance").lower()
if detection_method == "dbscan":
if not run_script("detect_dbscan", "DBSCAN-based Anomaly Detection"):
print("Pipeline failed at DBSCAN detection step")
return
else:
if not run_script("detect_distance", "Distance-based Anomaly Detection"):
print("Pipeline failed at distance detection step")
return
print("PIPELINE COMPLETED SUCCESSFULLY!")
print("=" * 70)
print("Output Files:")
if detection_method == "dbscan":
print(" visualization/dbscan_detection_results.html")
else:
print(" visualization/distance_detection_results.html")
print(" data/balanced_dataset.txt")
if __name__ == "__main__":
main()