Features added referring to #15

greninja · greninja · commit 0f28b184081a · 2017-01-02T23:00:50.000+05:30
diff --git a/conf/cuckooml.conf b/conf/cuckooml.conf
@@ -38,3 +38,6 @@ compare_new_samples = true
 
 # Set folder for samples to be compared against clustering
 test_directory = sample_data/test
+
+# Do plotting?
+plotting = true
diff --git a/modules/processing/cuckooml.py b/modules/processing/cuckooml.py
@@ -15,11 +15,18 @@
 from lib.cuckoo.common.constants import CUCKOO_ROOT
 from math import log
 
+if Config("cuckooml").cuckooml.plotting:
+    try:
+        import matplotlib.pyplot as plt
+        import seaborn as sns
+    except ImportError, e:
+        print >> sys.stderr, "Some error while importing"
+        print >> sys.stderr, e
+
+        
 try:
-    import matplotlib.pyplot as plt
     import numpy as np
     import pandas as pd
-    import seaborn as sns
     from hdbscan import HDBSCAN
     from sklearn import metrics
     from sklearn.cluster import DBSCAN
@@ -797,6 +804,14 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1,
 
     def detect_abnormal_behaviour(self, count_dataset=None, figures=True):
         """Detect samples that behave significantly different than others."""
+        
+        # Safety check for plotting
+        if not Config("cuckooml").cuckooml.plotting and figures:
+            print >> sys.stderr, "Warning: 'plotting' and 'figures' do not match. \
+                                  Plotting modules might not be imported."
+            figures = False
+
+
         if count_dataset is None:
             # Pull all count features
             count_features = self.feature_category(":count:")
@@ -1133,6 +1148,14 @@ def performance_metric(clustering, labels, data, noise):
 
     def clustering_label_distribution(self, clustering, labels, plot=False):
         """Get statistics about number of ground truth labels per cluster."""
+        
+        # Safety check for plotting
+        if not Config("cuckooml").cuckooml.plotting and plot:
+            print >> sys.stderr, "Warning: 'plotting' and 'plot' do not match.\
+                                  Plotting modules might not be imported."
+            plot = False
+
+
         cluster_ids = set(clustering["label"].tolist())
         labels_ids = set(labels["label"].tolist())
         cluster_distribution = {}