diff --git a/Pilot1/NT3/make_csv.py b/Pilot1/NT3/make_csv.py
new file mode 100644
index 00000000..4b6b1f79
--- /dev/null
+++ b/Pilot1/NT3/make_csv.py
@@ -0,0 +1,61 @@
+import pandas as pd
+import pickle
+import argparse
+import glob, os
+from pathlib import Path
+import matplotlib.pyplot as plt
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f",type=str, help="Run folder")
+    parser.add_argument("-c1", type=str, help="cluster 1 name")
+    parser.add_argument("-c2", type=str, help="cluster 2 name")
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = get_args()
+    l1 = []
+    l2 = []
+    runs = glob.glob(args.f+"/EXP000/*/")
+    print(runs)
+    for r in runs:
+        print(r)
+        global_data = pd.read_csv(r+"training.log")
+        val_abs = global_data['val_abstention'].iloc[-1]
+        val_abs_acc = global_data['val_abstention_acc'].iloc[-1]
+        if os.path.exists(r+"cluster_trace.pkl"):
+            cluster_data = pickle.load(open(r+"cluster_trace.pkl", "rb"))
+        else: 
+            continue
+        polluted_abs = cluster_data['Abs polluted']
+        val_abs_cluster = cluster_data['Abs val cluster']
+        val_abs_acc_cluster = cluster_data['Abs val acc']
+        ratio = float(r[-8:-5])
+        if args.c1 in r:
+            l1.append([ratio, val_abs, val_abs_acc, val_abs_cluster, val_abs_acc_cluster, polluted_abs])
+        elif args.c2 in r:
+            l2.append([ratio, val_abs, val_abs_acc, val_abs_cluster, val_abs_acc_cluster, polluted_abs])
+
+    df1 = pd.DataFrame(l1, columns=['Noise Fraction', 'Val Abs', 'Val Abs Acc', 'Val Abs Cluster', 'Val Abs Acc Cluster', 'Polluted Abs'])
+    df2 = pd.DataFrame(l2, columns=['Noise Fraction', 'Val Abs', 'Val Abs Acc', 'Val Abs Cluster', 'Val Abs Acc Cluster', 'Polluted Abs'])
+    print(df1)    
+    df1.to_csv("cluster_1.csv")
+    df2.to_csv("cluster_2.csv")
+    plt.plot(df1['Noise Fraction'], df1['Val Abs'], marker='o', label='Val Abs')
+    plt.plot(df1['Noise Fraction'], df1['Val Abs Acc'],  marker='o',label='Val Abs Acc')
+    plt.plot(df1['Noise Fraction'], df1['Val Abs Cluster'],  marker='o',label='Val Abs Cluster')
+    plt.plot(df1['Noise Fraction'], df1['Val Abs Acc Cluster'],  marker='o',label='Val Abs Acc Cluster')
+    plt.xlabel("Noise fraction")
+    plt.legend()
+    plt.savefig('c1.png')
+
+    plt.plot(df2['Noise Fraction'], df2['Val Abs'],  marker='o',label='Val Abs')
+    plt.plot(df2['Noise Fraction'], df2['Val Abs Acc'],  marker='o',label='Val Abs Acc')
+    plt.plot(df2['Noise Fraction'], df2['Val Abs Cluster'],  marker='o',label='Val Abs Cluster')
+    plt.plot(df2['Noise Fraction'], df2['Val Abs Acc Cluster'],  marker='o',label='Val Abs Acc Cluster')
+    plt.xlabel("Noise Fraction")
+    plt.legend()
+    plt.savefig('c2.png')
+if __name__ == "__main__":
+    main()
diff --git a/Pilot1/NT3/nt3_abstention_keras2.py b/Pilot1/NT3/nt3_abstention_keras2.py
index 7563d258..ff66aa4b 100644
--- a/Pilot1/NT3/nt3_abstention_keras2.py
+++ b/Pilot1/NT3/nt3_abstention_keras2.py
@@ -16,6 +16,7 @@
 
 import nt3 as bmk
 import candle
+import pickle
 
 additional_definitions = abs_definitions
 
@@ -51,7 +52,13 @@ def initialize_parameters(default_model='nt3_noise_model.txt'):
     gParameters = candle.finalize_parameters(nt3Bmk)
 
     return gParameters
-
+    
+def load_data_cf(cf_path):
+    # Pickle file holds the test train split and cf index info                                                       
+    print("Loading data...")
+    X_train, X_test, Y_train, Y_test, polluted_inds, cluster_inds = pickle.load(open(cf_path, 'rb'))
+    print('done')
+    return X_train, Y_train, X_test, Y_test, polluted_inds, cluster_inds
 
 def load_data(train_path, test_path, gParameters):
 
@@ -86,6 +93,38 @@ def load_data(train_path, test_path, gParameters):
 
     return X_train, Y_train, X_test, Y_test
 
+def evaluate_cf(model, nb_classes, output_dir, X_train, X_test, Y_train, Y_test, polluted_inds, cluster_inds, gParameters):
+    if len(polluted_inds) > 0:
+        y_pred = model.predict(X_test)
+        abstain_inds = []
+        for i in range(y_pred.shape[0]):
+            if np.argmax(y_pred[i]) == nb_classes:
+                abstain_inds.append(i)
+
+        # Cluster indices and polluted indices are wrt to entire train + test dataset                            
+        # whereas y_pred only contains test dataset so add offset for correct indexing                            
+        offset_testset = Y_train.shape[0]
+        abstain_inds=[i+offset_testset for i in abstain_inds]
+        polluted_percentage = np.sum([el in polluted_inds for el in abstain_inds])/np.max([len(abstain_inds),\
+1])
+        print("Percentage of abstained samples that were polluted {}".format(polluted_percentage))
+
+        cluster_inds_test = list(filter(lambda cluster_inds: cluster_inds >= offset_testset, cluster_inds))
+        cluster_inds_test_abstain = [el in abstain_inds for el in cluster_inds_test]
+        cluster_percentage = c = np.sum(cluster_inds_test_abstain)/len(cluster_inds_test)
+        print("Percentage of cluster (in test set) that was abstained {}".format(cluster_percentage))
+
+        unabstain_inds = []
+        for i in range(y_pred.shape[0]):
+            if np.argmax(y_pred[i]) != nb_classes and (i+offset_testset in cluster_inds_test):
+                unabstain_inds.append(i)
+        # Make sure number of unabstained indices in cluster test set plus number of abstainsed indices in cluster test set                                                                                                        
+        # equals number of indices in cluster in the test set                                                     
+        assert(len(unabstain_inds)+np.sum(cluster_inds_test_abstain) == len(cluster_inds_test))
+        score_cluster = 1 if len(unabstain_inds)==0 else model.evaluate(X_test[unabstain_inds], Y_test[unabstain_inds])[1]
+        print("Accuracy of unabastained cluster {}".format(score_cluster))
+        if gParameters['noise_save_cf']:
+            pickle.dump({'Abs polluted': polluted_percentage, 'Abs val cluster': cluster_percentage, 'Abs val acc': score_cluster}, open("{}/cluster_trace.pkl".format(output_dir), "wb"))
 
 def run(gParameters):
 
@@ -96,7 +135,10 @@ def run(gParameters):
     train_file = candle.get_file(file_train, url + file_train, cache_subdir='Pilot1')
     test_file = candle.get_file(file_test, url + file_test, cache_subdir='Pilot1')
 
-    X_train, Y_train, X_test, Y_test = load_data(train_file, test_file, gParameters)
+    if gParameters['noise_cf'] is not None:
+        X_train, Y_train, X_test, Y_test, polluted_inds, cluster_inds  = load_data_cf(gParameters['noise_cf'])
+    else:
+        X_train, Y_train, X_test, Y_test = load_data(train_file, test_file, gParameters)
 
     # only training set has noise
     X_train, Y_train = candle.add_noise(X_train, Y_train, gParameters)
@@ -274,6 +316,8 @@ def run(gParameters):
 
     score = model.evaluate(X_test, Y_test, verbose=0)
 
+    if gParameters['noise_cf'] is not None:
+        evaluate_cf(model, nb_classes, output_dir, X_train, X_test, Y_train, Y_test, polluted_inds, cluster_inds, gParameters)
     alpha_trace = open(output_dir + "/alpha_trace", "w+")
     for alpha in abstention_cbk.alphavalues:
         alpha_trace.write(str(alpha) + '\n')
diff --git a/Pilot1/NT3/nt3_baseline_keras2.py b/Pilot1/NT3/nt3_baseline_keras2.py
index 3eceba0a..4cb17472 100644
--- a/Pilot1/NT3/nt3_baseline_keras2.py
+++ b/Pilot1/NT3/nt3_baseline_keras2.py
@@ -15,9 +15,9 @@
 
 import nt3 as bmk
 import candle
+import pickle
 
-
-def initialize_parameters(default_model='nt3_default_model.txt'):
+def initialize_parameters(default_model='nt3_noise_model.txt'):
 
     # Build benchmark object
     nt3Bmk = bmk.BenchmarkNT3(
@@ -238,6 +238,10 @@ def run(gParameters):
 
         print("json %s: %.2f%%" % (loaded_model_json.metrics_names[1], score_json[1] * 100))
 
+
+    if gParameters['noise_save_cf']:
+        model.save('{}/{}.autosave.model'.format(output_dir, model_name))
+        pickle.dump([X_train, X_test, Y_train, Y_test], open('{}/{}.autosave.data.pkl'.format(output_dir, model_name), "wb"))
     return history
 
 
diff --git a/Pilot1/NT3/nt3_cf/README.md b/Pilot1/NT3/nt3_cf/README.md
new file mode 100644
index 00000000..284a1bb3
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/README.md
@@ -0,0 +1,30 @@
+NT3 with counterfactuals:
+Code to generate counterfactual examples given an input model and dataset in pkl format. \
+Clusters and thresholds counterfactuals, injects noise into dataset \
+Workflow: 
+1) Generate counterfactuals using cf_nb.py
+```
+python cf_nb.py
+```
+
+2) Create threshold pickle files using threshold.py (provide a threshold value between 0 and 1, see --help) 
+```
+python threshold.py -d ../nt3.autosave.data.pkl -c cf_redo_all_reformat.pkl -t 0.9 -o threshold_0.9.pkl
+```
+
+3) Cluster threshold files using gen_clusters.py
+```
+python gen_clusters.py -t_value 0.9 -t threshold_0.9.pkl
+```
+
+4) Inject noise into dataset using inject_noise.py (provide a scale value to modify the amplitude of the noise, see --help)
+```
+python inject_noise.py -t threshold_0.9.pkl -c1 cf_class_0_cluster0.pkl -c2 cf_class_1_cluster0.pkl -scale 1.0 -r True -d ../nt3.autosave.data.pkl -f cf_failed_inds.pkl -o noise_data 
+```
+
+Abstention with counterfactuals:
+Code located in abstention/
+Workflow:
+1) Run abstention model with nt3_abstention_keras2_cf.py, pass in a pickle file with X (with noise), y (this is the output of 4) above)
+2) For a sweep use run_abstention_sweep.sh
+3) To collect metrics (abstention, cluster abstention) run make_csv.py
diff --git a/Pilot1/NT3/nt3_cf/abstention/make_csv.py b/Pilot1/NT3/nt3_cf/abstention/make_csv.py
new file mode 100644
index 00000000..6ee2d98a
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/abstention/make_csv.py
@@ -0,0 +1,41 @@
+import pandas as pd
+import pickle
+import argparse
+import glob, os
+from pathlib import Path
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f",type=str, help="Run folder")
+    parser.add_argument("-c1", type=str, help="cluster 1 name")
+    parser.add_argument("-c2", type=str, help="cluster 2 name")
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = get_args()
+    l1 = []
+    l2 = []
+    runs = glob.glob(args.f+"/EXP000/*/")
+    print(runs)
+    for r in runs:
+        global_data = pd.read_csv(r+"training.log")
+        val_abs = global_data['val_abstention'].iloc[-1]
+        val_abs_acc = global_data['val_abstention_acc'].iloc[-1]
+        cluster_data = pickle.load(open(r+"cluster_trace.pkl", "rb"))
+        polluted_abs = cluster_data['Abs polluted']
+        val_abs_cluster = cluster_data['Abs val cluster']
+        val_abs_acc_cluster = cluster_data['Abs val acc']
+        ratio = float(r[-4:-1])
+        if args.c1 in r:
+            l1.append([ratio, val_abs, val_abs_acc, val_abs_cluster, val_abs_acc_cluster, polluted_abs])
+        elif args.c2 in r:
+            l2.append([ratio, val_abs, val_abs_acc, val_abs_cluster, val_abs_acc_cluster, polluted_abs])
+
+    df1 = pd.DataFrame(l1, columns=['Noise Fraction', 'Val Abs', 'Val Abs Acc', 'Val Abs Cluster', 'Val Abs Acc Cluster', 'Polluted Abs'])
+    df2 = pd.DataFrame(l2, columns=['Noise Fraction', 'Val Abs', 'Val Abs Acc', 'Val Abs Cluster', 'Val Abs Acc Cluster', 'Polluted Abs'])
+    print(df1)    
+    df1.to_csv("cluster_1.csv")
+    df2.to_csv("cluster_2.csv")
+if __name__ == "__main__":
+    main()
diff --git a/Pilot1/NT3/nt3_cf/abstention/run_abstention_sweep.sh b/Pilot1/NT3/nt3_cf/abstention/run_abstention_sweep.sh
new file mode 100755
index 00000000..b7a9f611
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/abstention/run_abstention_sweep.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+for filename in /vol/ml/shahashka/xai-geom/nt3/nt3.data*; do
+    python nt3_abstention_keras2_cf.py --cf_noise $filename --output_dir cf_sweep_0906 --run_id ${filename:40:21} --epochs 100
+    #cp cf_sweep_0902/EXP000/RUN000/training.log ${filename}_training_0902.log 
+done
diff --git a/Pilot1/NT3/nt3_cf/analyze.py b/Pilot1/NT3/nt3_cf/analyze.py
new file mode 100644
index 00000000..6e62783b
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/analyze.py
@@ -0,0 +1,33 @@
+# Script to analyze perturbation by cluster
+# Plot the perturbations by cluster
+# Plot the pertubation centroids
+
+import os
+import pickle
+import matplotlib.pyplot as plt
+import numpy as np
+directory = 'clusters_0911_0.5/'
+orig_dataset = pickle.load(open("nt3.autosave.data.pkl", 'rb'))[0]
+cf_dataset = pickle.load(open("threshold_0905.pkl", 'rb'))['perturbation vector']
+for filename in os.listdir(directory):
+    if filename.startswith("cf_class_0") or filename.startswith("cf_class_1") :
+        data = pickle.load(open(os.path.join(directory, filename), 'rb'))
+        x_range = np.arange(len(data['centroid perturb vector']))
+        ind_in_cluster = data['sample indices in this cluster'][0:5]
+        fig,ax = plt.subplots(3, figsize=(20,15))
+        fig.suptitle("Perturbation Vectors for counterfactual class 1, cluster 1", fontsize=25)
+        for i,ax_i in zip(ind_in_cluster,ax):
+            d = cf_dataset[i]
+            ax_i.plot(x_range, d, label='perturbation vector')
+            ax_i.plot(x_range ,data['centroid perturb vector'], label='centroid')
+            #ax_i.axhline(y=0.5*np.max(np.abs(d)), color='r', linestyle='-')
+            #ax_i.axhline(y=-0.5*np.max(np.abs(d)), color='r', linestyle='-')
+            ax_i.axvline(x=9603, color='r', linestyle='-', linewidth=5, alpha=0.3)
+
+            ax_i.set_title("sample {}".format(i))
+            ax_i.legend()
+        fig.supxlabel("Feature index", fontsize=18)
+        plt.savefig("centroids_{}.png".format(filename))
+
+    else:
+        continue
diff --git a/Pilot1/NT3/nt3_cf/cf_nb.py b/Pilot1/NT3/nt3_cf/cf_nb.py
new file mode 100644
index 00000000..2cd187b3
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/cf_nb.py
@@ -0,0 +1,56 @@
+import tensorflow as tf
+tf.get_logger().setLevel(40) # suppress deprecation messages
+tf.compat.v1.disable_v2_behavior() # disable TF2 behaviour as alibi code still relies on TF1 constructs
+from tensorflow.keras.models import Model, load_model
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="1"
+from time import time
+from alibi.explainers import CounterFactual, CounterFactualProto
+print('TF version: ', tf.__version__)
+print('Eager execution enabled: ', tf.executing_eagerly()) # False
+print(tf.test.is_gpu_available())
+import pickle
+model_nt3 = tf.keras.models.load_model('../nt3.autosave.model')
+with open('../nt3.autosave.data.pkl', 'rb') as pickle_file:
+        X_train,X_test,Y_train,Y_test = pickle.load(pickle_file)
+
+shape_cf = (1,) + X_train.shape[1:]
+print(shape_cf)
+target_proba = 0.9
+tol = 0.1 # want counterfactuals with p(class)>0.90
+target_class = 'other' # any class other than will do
+max_iter = 1000
+lam_init = 1e-1
+max_lam_steps = 20
+learning_rate_init = 0.1
+feature_range = (0,1)
+cf = CounterFactual(model_nt3, shape=shape_cf, target_proba=target_proba, tol=tol,
+                                        target_class=target_class, max_iter=max_iter, lam_init=lam_init,
+                                        max_lam_steps=max_lam_steps, learning_rate_init=learning_rate_init,
+                                        feature_range=feature_range)
+shape = X_train[0].shape[0]
+results=[]
+failed_inds = []
+X = np.concatenate([X_train,X_test])
+
+for i in np.arange(0,X.shape[0]):
+    print(i)
+    x_sample=X[i:i+1]
+    print(x_sample.shape)
+    start = time()
+    try:
+        explanation = cf.explain(x_sample)
+        print('Counterfactual prediction: {}, {}'.format(explanation.cf['class'], explanation.cf['proba']))
+        print("Actual prediction: {}".format(model_nt3.predict(x_sample)))
+        results.append([i, explanation.cf['X'],explanation.cf['class'], explanation.cf['proba']])
+        test = model_nt3.predict(explanation.cf['X'])
+        print(test, explanation.cf['proba'], explanation.cf['class'])
+    except:
+        print("Failed cf generation")
+        failed_inds.append(i)
+    if i%100 == 0 and i is not 0:
+        pickle.dump(results, open("cf_{}.pkl".format(i), "wb"))
+        results = []
+pickle.dump(failed_inds, open("cf_failed_inds.pkl", "wb"))
diff --git a/Pilot1/NT3/nt3_cf/environment.yml b/Pilot1/NT3/nt3_cf/environment.yml
new file mode 100644
index 00000000..669ca25e
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/environment.yml
@@ -0,0 +1,264 @@
+name: xai-geom-tf
+channels:
+  - anaconda
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=1_gnu
+  - _tflow_select=2.1.0=gpu
+  - absl-py=0.11.0=py38h578d9bd_0
+  - aiohttp=3.7.3=py38h497a2fe_1
+  - anyio=2.0.2=py38h578d9bd_4
+  - argon2-cffi=20.1.0=py38h497a2fe_2
+  - astor=0.8.1=pyh9f0ad1d_0
+  - astunparse=1.6.3=pyhd8ed1ab_0
+  - async-timeout=3.0.1=py_1000
+  - async_generator=1.10=py_0
+  - attrs=20.3.0=pyhd3deb0d_0
+  - babel=2.9.0=pyhd3deb0d_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=py_2
+  - backports.functools_lru_cache=1.6.1=py_0
+  - bleach=3.3.0=pyh44b312d_0
+  - blinker=1.4=py_1
+  - brotlipy=0.7.0=py38h497a2fe_1001
+  - c-ares=1.17.1=h36c2ea0_0
+  - ca-certificates=2020.10.14=0
+  - cachetools=4.2.1=pyhd8ed1ab_0
+  - certifi=2020.6.20=py38_0
+  - cffi=1.14.4=py38ha65f79e_1
+  - chardet=3.0.4=py38h924ce5b_1008
+  - click=7.1.2=pyh9f0ad1d_0
+  - cryptography=3.3.1=py38h2b97feb_1
+  - cudatoolkit=10.1.243=h036e899_7
+  - cudnn=7.6.5.32=hc0a50b0_1
+  - cupti=10.1.168=0
+  - cycler=0.10.0=py_2
+  - dbus=1.13.6=hfdff14a_1
+  - decorator=4.4.2=py_0
+  - defusedxml=0.6.0=py_0
+  - entrypoints=0.3=pyhd8ed1ab_1003
+  - expat=2.2.10=h9c3ff4c_0
+  - fontconfig=2.13.1=hba837de_1004
+  - freetype=2.10.4=h0708190_1
+  - gast=0.3.3=py_0
+  - gettext=0.19.8.1=h0b5b191_1005
+  - glib=2.66.4=hc4f0c31_2
+  - glib-tools=2.66.4=hc4f0c31_2
+  - google-auth=1.24.0=pyhd3deb0d_0
+  - google-auth-oauthlib=0.4.1=py_2
+  - google-pasta=0.2.0=pyh8c360ce_0
+  - grpcio=1.35.0=py38hdd6454d_0
+  - gst-plugins-base=1.14.5=h0935bb2_2
+  - gstreamer=1.18.3=h3560a44_0
+  - h5py=2.10.0=nompi_py38h7442b35_105
+  - hdf5=1.10.6=nompi_h6a2412b_1114
+  - icu=68.1=h58526e2_0
+  - idna=2.10=pyh9f0ad1d_0
+  - importlib-metadata=3.4.0=py38h578d9bd_0
+  - importlib_metadata=3.4.0=hd8ed1ab_0
+  - intel-openmp=2020.2=254
+  - ipykernel=5.3.4=py38h5ca1d4c_0
+  - ipython=7.20.0=py38h81c977d_0
+  - ipython_genutils=0.2.0=py_1
+  - jedi=0.18.0=py38h578d9bd_2
+  - jinja2=2.11.3=pyh44b312d_0
+  - jpeg=9d=h36c2ea0_0
+  - json5=0.9.5=pyh9f0ad1d_0
+  - jsonschema=3.2.0=py_2
+  - jupyter_client=6.1.11=pyhd8ed1ab_1
+  - jupyter_core=4.7.1=py38h578d9bd_0
+  - jupyter_server=1.2.3=py38h578d9bd_1
+  - jupyterlab=3.0.6=pyhd8ed1ab_0
+  - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
+  - jupyterlab_server=2.1.3=pyhd8ed1ab_0
+  - keras-preprocessing=1.1.2=pyhd8ed1ab_0
+  - kiwisolver=1.3.1=py38h1fd1430_1
+  - krb5=1.17.2=h926e7f8_0
+  - lcms2=2.11=hcbb858e_1
+  - ld_impl_linux-64=2.35.1=hea4e1c9_2
+  - libblas=3.9.0=7_openblas
+  - libcblas=3.9.0=7_openblas
+  - libclang=11.0.1=default_ha53f305_1
+  - libcurl=7.71.1=hcdd3856_8
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=h516909a_1
+  - libevent=2.1.10=hcdb4288_3
+  - libffi=3.3=h58526e2_2
+  - libgcc-ng=9.3.0=h2828fa1_18
+  - libgfortran-ng=9.3.0=hff62375_18
+  - libgfortran5=9.3.0=hff62375_18
+  - libglib=2.66.4=h748fe8e_2
+  - libgomp=9.3.0=h2828fa1_18
+  - libiconv=1.16=h516909a_0
+  - liblapack=3.9.0=7_openblas
+  - libllvm11=11.0.1=hf817b99_0
+  - libnghttp2=1.43.0=h812cca2_0
+  - libopenblas=0.3.12=pthreads_h4812303_1
+  - libpng=1.6.37=h21135ba_2
+  - libpq=12.3=h255efa7_3
+  - libprotobuf=3.14.0=h780b84a_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libssh2=1.9.0=hab1572f_5
+  - libstdcxx-ng=9.3.0=h6de172a_18
+  - libtiff=4.2.0=hdc55705_0
+  - libuuid=2.32.1=h7f98852_1000
+  - libwebp-base=1.2.0=h7f98852_0
+  - libxcb=1.13=h7f98852_1003
+  - libxkbcommon=1.0.3=he3ba5ed_0
+  - libxml2=2.9.10=h72842e0_3
+  - lz4-c=1.9.3=h9c3ff4c_0
+  - markdown=3.3.3=pyh9f0ad1d_0
+  - markupsafe=1.1.1=py38h497a2fe_3
+  - matplotlib=3.3.4=py38h578d9bd_0
+  - matplotlib-base=3.3.4=py38h0efea84_0
+  - mistune=0.8.4=py38h497a2fe_1003
+  - mkl=2020.2=256
+  - multidict=5.1.0=py38h497a2fe_1
+  - mysql-common=8.0.22=ha770c72_3
+  - mysql-libs=8.0.22=h935591d_3
+  - nbclassic=0.2.6=pyhd8ed1ab_0
+  - nbclient=0.5.1=py_0
+  - nbconvert=6.0.7=py38h578d9bd_3
+  - nbformat=5.1.2=pyhd8ed1ab_1
+  - ncurses=6.2=h58526e2_4
+  - nest-asyncio=1.4.3=pyhd8ed1ab_0
+  - ninja=1.10.2=h4bd325d_0
+  - notebook=6.2.0=py38h578d9bd_0
+  - nspr=4.29=h9c3ff4c_1
+  - nss=3.61=hb5efdd6_0
+  - numpy=1.20.0=py38h18fd61f_0
+  - oauthlib=3.0.1=py_0
+  - olefile=0.46=pyh9f0ad1d_1
+  - openssl=1.1.1i=h7f98852_0
+  - opt_einsum=3.3.0=py_0
+  - packaging=20.8=pyhd3deb0d_0
+  - pandoc=2.11.4=h7f98852_0
+  - pandocfilters=1.4.2=py_1
+  - parso=0.8.1=pyhd8ed1ab_0
+  - pcre=8.44=he1b5a44_0
+  - pexpect=4.8.0=pyh9f0ad1d_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=8.1.0=py38h357d4e7_1
+  - pip=21.0.1=pyhd8ed1ab_0
+  - prometheus_client=0.9.0=pyhd3deb0d_0
+  - prompt-toolkit=3.0.14=pyha770c72_0
+  - protobuf=3.14.0=py38h709712a_1
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pyasn1=0.4.8=py_0
+  - pyasn1-modules=0.2.7=py_0
+  - pycparser=2.20=pyh9f0ad1d_2
+  - pygments=2.7.4=pyhd8ed1ab_0
+  - pyjwt=2.0.1=pyhd8ed1ab_0
+  - pyopenssl=20.0.1=pyhd8ed1ab_0
+  - pyparsing=2.4.7=pyh9f0ad1d_0
+  - pyqt=5.12.3=py38h578d9bd_7
+  - pyqt-impl=5.12.3=py38h7400c14_7
+  - pyqt5-sip=4.19.18=py38h709712a_7
+  - pyqtchart=5.12=py38h7400c14_7
+  - pyqtwebengine=5.12.1=py38h7400c14_7
+  - pyrsistent=0.17.3=py38h497a2fe_2
+  - pysocks=1.7.1=py38h578d9bd_3
+  - python=3.8.6=hffdb5ce_5_cpython
+  - python-dateutil=2.8.1=py_0
+  - python_abi=3.8=1_cp38
+  - pytz=2021.1=pyhd8ed1ab_0
+  - pyzmq=22.0.1=py38h3d7ac18_0
+  - qt=5.12.9=h9d6b050_2
+  - readline=8.0=he28a2e2_2
+  - requests=2.25.1=pyhd3deb0d_0
+  - requests-oauthlib=1.3.0=pyh9f0ad1d_0
+  - rsa=4.7=pyhd3deb0d_0
+  - send2trash=1.5.0=py_0
+  - setuptools=49.6.0=py38h578d9bd_3
+  - sip=4.19.13=py38he6710b0_0
+  - six=1.15.0=pyh9f0ad1d_0
+  - sniffio=1.2.0=py38h578d9bd_1
+  - sqlite=3.34.0=h74cdb3f_0
+  - tensorboard-plugin-wit=1.8.0=pyh44b312d_0
+  - tensorflow=2.2.0=gpu_py38hb782248_0
+  - tensorflow-base=2.2.0=gpu_py38h83e3d50_0
+  - tensorflow-gpu=2.2.0=h0d30ee6_0
+  - termcolor=1.1.0=py_2
+  - terminado=0.9.2=py38h578d9bd_0
+  - testpath=0.4.4=py_0
+  - tk=8.6.10=h21135ba_1
+  - tornado=6.1=py38h497a2fe_1
+  - traitlets=5.0.5=py_0
+  - typing-extensions=3.7.4.3=0
+  - typing_extensions=3.7.4.3=py_0
+  - urllib3=1.26.3=pyhd8ed1ab_0
+  - wcwidth=0.2.5=pyh9f0ad1d_2
+  - webencodings=0.5.1=py_1
+  - werkzeug=1.0.1=pyh9f0ad1d_0
+  - wheel=0.36.2=pyhd3deb0d_0
+  - wrapt=1.12.1=py38h497a2fe_3
+  - xorg-libxau=1.0.9=h7f98852_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xz=5.2.5=h516909a_1
+  - yarl=1.6.3=py38h497a2fe_1
+  - zeromq=4.3.3=h58526e2_3
+  - zipp=3.4.0=py_0
+  - zlib=1.2.11=h516909a_1010
+  - zstd=1.4.8=ha95c52a_1
+  - pip:
+    - alibi==0.5.5
+    - altair==4.1.0
+    - astropy==4.2
+    - beautifulsoup4==4.9.3
+    - blis==0.7.4
+    - catalogue==2.0.1
+    - click-plugins==1.1.1
+    - cligj==0.7.1
+    - cloudpickle==1.6.0
+    - cymem==2.0.5
+    - descartes==1.1.0
+    - eli5==0.11.0
+    - fiona==1.8.18
+    - geopandas==0.8.2
+    - imageio==2.9.0
+    - joblib==1.0.0
+    - keras==2.4.3
+    - llvmlite==0.35.0
+    - munch==2.5.0
+    - murmurhash==1.0.5
+    - networkx==2.5
+    - numba==0.52.0
+    - opt-einsum==3.3.0
+    - pandas==1.2.1
+    - pathy==0.3.4
+    - patsy==0.5.1
+    - preshed==3.0.5
+    - pydantic==1.7.3
+    - pyerfa==1.7.1.1
+    - pyproj==3.0.0.post1
+    - python-graphviz==0.16
+    - pywavelets==1.1.1
+    - pyyaml==5.4.1
+    - scikit-image==0.18.1
+    - scikit-learn==0.24.1
+    - scipy==1.4.1
+    - shap==0.38.1
+    - shapely==1.7.1
+    - slicer==0.0.7
+    - smart-open==3.0.0
+    - soupsieve==2.1
+    - spacy==3.0.0
+    - spacy-legacy==3.0.1
+    - spacy-lookups-data==1.0.0
+    - srsly==2.4.0
+    - statsmodels==0.12.2
+    - tabulate==0.8.7
+    - tensorboard==2.2.2
+    - tensorflow-estimator==2.2.0
+    - thinc==8.0.1
+    - threadpoolctl==2.1.0
+    - tifffile==2021.1.14
+    - toolz==0.11.1
+    - tqdm==4.56.0
+    - typer==0.3.2
+    - wasabi==0.8.2
+prefix: /vol/ml/shahashka/anaconda3/envs/xai-geom-tf
+
diff --git a/Pilot1/NT3/nt3_cf/gen_clusters.py b/Pilot1/NT3/nt3_cf/gen_clusters.py
new file mode 100644
index 00000000..2dcd36a7
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/gen_clusters.py
@@ -0,0 +1,113 @@
+
+import numpy as np
+import pickle
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+from sklearn.metrics import silhouette_score
+import argparse
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", type=str, help="threshod input file")
+    parser.add_argument("-t_value", type=float, help="threshold value")
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+
+    args = get_args()
+
+    thresholds_9 = pickle.load(open(args.t, 'rb'))
+
+    perturb_vector=thresholds_9['perturbation vector']
+    cf_class = thresholds_9['counterfactual class']
+    indices = thresholds_9['sample index']
+
+    # split by class
+    perturb_vector_0=[]
+    perturb_vector_1=[]
+    indices_0 = []
+    indices_1 = []
+    for i,j,k in zip(perturb_vector, cf_class, indices):
+        if j==0:
+            perturb_vector_0.append(i)
+            indices_0.append(k)
+        else:
+            perturb_vector_1.append(i)
+            indices_1.append(k)
+
+    indices_0 = np.array(indices_0)
+    indices_1 = np.array(indices_1)
+    sil = []
+    print(len(perturb_vector_0), len(perturb_vector_1))
+    kmax = np.min([len(perturb_vector_0), len(perturb_vector_1),10])
+    data_2D = PCA(20).fit_transform(perturb_vector_0)
+
+    # dissimilarity would not be defined for a single cluster, thus, minimum number of clusters should be 2
+    for k in range(2, kmax + 1):
+        print(k)
+        kmeans = KMeans(n_clusters=k).fit(data_2D[:,0:2])
+        labels = kmeans.labels_
+        sil.append(silhouette_score(data_2D[:,0:2], labels, metric='euclidean'))
+    #plt.plot(np.arange(2, kmax+1), sil)
+    #plt.title("Silhouette scores to determine optimal k")
+    #plt.xlabel("k")
+    #plt.show()
+    k = np.argmax(sil) + 2 if len(sil) > 0 else kmax
+    print(k)
+    #data_2D = PCA(2).fit_transform(perturb_vector_0)
+    kmeans_0 = KMeans(n_clusters=k).fit(data_2D[:,0:2])
+    labels_0 = kmeans_0.labels_
+    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
+    for i in range(k):
+        plt.scatter(data_2D[:,0][labels_0==i], data_2D[:,1][labels_0==i], c=colors[i%len(colors)])
+    plt.title("KMeans clusters with 2D PCA")
+    plt.savefig("CF_0.png")
+    k0 = k
+    sil=[]
+    data_2D = PCA(20).fit_transform(perturb_vector_1)
+    for k in range(2, kmax + 1):
+        kmeans = KMeans(n_clusters=k).fit(data_2D[:,0:2])#perturb_vector_1)
+        labels = kmeans.labels_
+        sil.append(silhouette_score(data_2D[:,0:2], labels, metric='euclidean'))
+    #plt.plot(np.arange(2, kmax+1), sil)
+    #plt.title("Silhouette scores to determine optimal k")
+    #plt.xlabel("k")
+    #plt.show()
+    k = np.argmax(sil) + 2 if len(sil) > 0 else kmax
+    print(k)
+    #data_2D = PCA(2).fit_transform(perturb_vector_1)
+    kmeans_1 = KMeans(n_clusters=k).fit(data_2D[:,0:2])#perturb_vector_1)
+    labels_1 = kmeans_1.labels_
+    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
+    for i in range(k):
+        plt.scatter(data_2D[:,0][labels_1==i], data_2D[:,1][labels_1==i], c=colors[i%len(colors)])
+    plt.title("Perturbation vectors KMeans clusters with 2D PCA")
+    plt.savefig("CF 1.png")
+
+for i in range(len(kmeans_0.cluster_centers_)):
+    diff_0=kmeans_0.cluster_centers_[i]
+    max_value = np.max(np.abs(diff_0))
+    ind_pos = np.where(diff_0 > args.t_value*max_value)
+    ind_neg = np.where(diff_0 < -1*args.t_value*max_value)
+    output = {'centroid perturb vector': diff_0,
+                 'positive threshold indices':ind_pos,
+                 'negative threshold indices':ind_neg,
+                 'sample indices in this cluster':indices_0[labels_0==i]}
+    print(output)
+    pickle.dump(output,
+                open("cf_class_0_cluster{}.pkl".format(i), "wb"))
+
+for i in range(len(kmeans_1.cluster_centers_)):
+    diff_1=kmeans_1.cluster_centers_[i]
+    max_value = np.max(np.abs(diff_1))
+    ind_pos = np.where(diff_1 > args.t_value*max_value)
+    ind_neg = np.where(diff_1 < -1*args.t_value*max_value)
+    output = {'centroid perturb vector': diff_1,
+                 'positive threshold indices':ind_pos,
+                 'negative threshold indices':ind_neg,
+                 'sample indices in this cluster':indices_1[labels_1==i]}
+    print(output)
+    pickle.dump(output,
+                open("cf_class_1_cluster{}.pkl".format(i), "wb"))
diff --git a/Pilot1/NT3/nt3_cf/inject_noise.py b/Pilot1/NT3/nt3_cf/inject_noise.py
new file mode 100644
index 00000000..b8071579
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/inject_noise.py
@@ -0,0 +1,120 @@
+import pickle
+import numpy as np
+import copy
+import argparse
+import os
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-t", type=str, help="threshold pickle file")
+    parser.add_argument("-c1", type=str, help="cluster 1 file")
+    parser.add_argument("-c2", type=str, help="cluster 2 file")
+    parser.add_argument("-scale", type=float, help="scale factor for noise injection")
+    parser.add_argument("-r", type=bool, help="flag to add random noise")
+    parser.add_argument("-o", type=str, help="folder for output files")
+    parser.add_argument("-d", type=str, help="nt3 data file")
+    parser.add_argument("-f", type=str, help="pickle file containing failed cf indices")
+    args = parser.parse_args()
+    return args
+
+# Choose a random set of indices to inject cf noise into
+def random_noise(s,scale,size, cluster_inds, args):
+    X_train, X_test, y_train, y_test = pickle.load(open(args.d, 'rb'))
+    #X_data, y_data = pickle.load(open(args.d, 'rb'))
+    #X_data = np.concatenate([X_train, X_test])
+    genes = np.random.choice(np.arange(X_train.shape[0]), replace=False, size=size)
+    noise = np.random.normal(0,1,size)
+    X_data_noise = copy.deepcopy(X_train)
+    s, _ = s.split(".")
+    cluster_name = s[3:]
+    for p in np.arange(0.1,1.0, 0.1):
+        for i in cluster_inds:
+            for j in range(size):
+                X_data_noise[i][genes[j]]+=noise[j]
+        # Now split back into train test for output                                                             
+        #X_train = X_data_noise[0:(int)(0.8*X_data.shape[0])]
+        #X_test = X_data_noise[(int)(0.8*X_data.shape[0]):]
+        pickle.dump([X_data_noise, X_test, y_train, y_test, [], cluster_inds], open("{}/nt3.data.random.scale_{}_{}.noise_{}.pkl".format(args.o,scale,cluster_name,round(p,1)), "wb"))
+    
+def main():
+    args = get_args()
+    isExist = os.path.exists(args.o)
+    if not isExist:
+        os.makedirs(args.o)
+    # For 2 clusters (with sparse injection feature vector) add CF noise to x% of samples
+    X_train, X_test, y_train, y_test = pickle.load(open(args.d, 'rb'))
+    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
+    #X_data, y_data = pickle.load(open(args.d, 'rb')) 
+    threshold_dataset = pickle.load(open(args.t, 'rb'))
+    perturb_dataset = threshold_dataset['perturbation vector']
+    
+    
+    #combine for easier indexing later
+    #X_data = np.concatenate([X_train, X_test])
+
+    #account for failed indices
+    failed_indices = pickle.load(open(args.f, 'rb'))[0]
+    failed_indices=[919]
+    print(failed_indices)
+    for i in failed_indices:
+        perturb_dataset.insert(i, np.zeros(X_train.shape[1]))
+    perturb_dataset = np.array(perturb_dataset)
+    
+    _, cf1 = os.path.split(args.c1)
+    _, cf2 = os.path.split(args.c2)
+    cluster_files = [cf1, cf2]
+    perturb_dataset = perturb_dataset[0:X_train.shape[0]]
+    for i in range(len(cluster_files)):
+        print(cluster_files[i])
+        d = pickle.load(open(cluster_files[i], "rb"))
+        cluster_inds = d['sample indices in this cluster']
+        cluster_inds_noise = list(filter(lambda val: val < 1120, cluster_inds))
+
+        if args.r:
+            random_noise(cluster_files[i],args.scale,20, cluster_inds_noise, args)
+        
+        # Sweep through percentages
+        for p in np.arange(0.1,1.0, 0.1):
+            print("p={}".format(p))
+            X_data_noise = copy.deepcopy(X_train)
+            
+            #Full cf injection
+            # Choose x% of the indices to be perturbed
+            selector = np.random.choice(a=cluster_inds_noise, replace=False, size = (int)(p*len(cluster_inds_noise)))
+            X_data_noise[selector]-= args.scale*perturb_dataset[selector][:,:,None]
+            
+            # Now split back into train test for output
+            #X_train = X_data_noise[0:(int)(0.8*X_data.shape[0])]
+            #X_test = X_data_noise[(int)(0.8*X_data.shape[0]):]
+
+            s,_ = cluster_files[i].split(".")
+            cluster_name = s[3:]
+            pickle.dump([X_data_noise, X_test, y_train, y_test, selector, cluster_inds], open("{}/nt3.data.scale_{}_{}.noise_{}.pkl".format(args.o, args.scale,cluster_name, round(p,1)), "wb"))
+
+            # Add cf noise only to those indices that passed the threshold value (instead of the full cf profile)
+            inds = []
+            for j in d['positive threshold indices'][0]:
+                inds.append(j)
+            for j in d['negative threshold indices'][0]:
+                inds.append(j)
+            X_data_noise_2 = copy.deepcopy(X_train)
+            
+            all_inds = np.arange(X_train.shape[0])
+            for j in all_inds:
+                if j not in inds:
+                    perturb_dataset[:,j]=0
+            X_data_noise_2[selector]-= args.scale*perturb_dataset[selector][:,:,None]
+            
+            # Now split back into train test
+            #X_train = X_data_noise_2[0:(int)(0.8*X_data.shape[0])]
+            #X_test = X_data_noise_2[(int)(0.8*X_data.shape[0]):]
+            
+            pickle.dump([X_data_noise_2, X_test, y_train, y_test, selector, cluster_inds], open("{}/nt3.data.threshold.scale_{}_{}.noise_{}.pkl".format(args.o, args.scale, cluster_name, round(p,1)), "wb"))
+            
+if __name__ == "__main__":
+    main()
+
+
+
+
+
+# Save dataset file
diff --git a/Pilot1/NT3/nt3_cf/test_cf_accuracy.py b/Pilot1/NT3/nt3_cf/test_cf_accuracy.py
new file mode 100644
index 00000000..af9b5f19
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/test_cf_accuracy.py
@@ -0,0 +1,82 @@
+import tensorflow as tf
+from tensorflow.keras.models import Model, load_model
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pickle
+import argparse
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", type=str, help="model file")
+    parser.add_argument("-prefix", type=str, help="noise file prefix")
+    parser.add_argument("-prefix_rand", type=str, help="random noise file prefix")
+    parser.add_argument("-prefix_rand_cf", type=str, help="random noise along cf indices") 
+    parser.add_argument("-folder", type=str, help="folder path to noise files")
+    parser.add_argument("-o", type=str, help="name of saved png")
+    parser.add_argument("-n", type=str, help="name of cluster")
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    model_nt3 = tf.keras.models.load_model(args.m)
+
+    results = []
+    for i in np.arange(0.1,1.0, 0.1):
+        cf_dataset = pickle.load(open("{}_{}.pkl".format(args.prefix, round(i,2)), "rb"))
+        X_cf_dataset = np.concatenate([cf_dataset[0], cf_dataset[1]])
+        y_cf_dataset = np.concatenate([cf_dataset[2], cf_dataset[3]])
+        #X_cf_dataset = cf_dataset[0]
+        #y_cf_dataset = cf_dataset[1]
+        cluster_inds = cf_dataset[-1]
+        print(model_nt3.metrics_names)
+        acc = model_nt3.evaluate(X_cf_dataset, y_cf_dataset)
+        cluster_acc = model_nt3.evaluate(X_cf_dataset[cluster_inds], y_cf_dataset[cluster_inds])
+        print(i, acc, cluster_acc)
+        results.append([acc[1], cluster_acc[1]])
+    results = np.array(results)
+#    plt.plot(np.arange(0.1,1.0,0.1), results[:,0], label="full dataset accuracy with cf pertubation", marker='o')
+    plt.plot(np.arange(0.1,1.0, 0.1), results[:,1], label="cluster accuracy with cf perturbation", marker='o')
+
+    results = []
+    for i in np.arange(0.1,1.0, 0.1):
+        cf_dataset = pickle.load(open("{}_{}.pkl".format(args.prefix_rand, round(i,2)), "rb"))
+        X_cf_dataset = np.concatenate([cf_dataset[0], cf_dataset[1]])
+        y_cf_dataset = np.concatenate([cf_dataset[2], cf_dataset[3]])
+        #X_cf_dataset = cf_dataset[0]
+        #y_cf_dataset = cf_dataset[1]
+        cluster_inds = cf_dataset[-1]
+        print(model_nt3.metrics_names)
+        acc = model_nt3.evaluate(X_cf_dataset, y_cf_dataset)
+        cluster_acc = model_nt3.evaluate(X_cf_dataset[cluster_inds], y_cf_dataset[cluster_inds])
+        print(i, acc, cluster_acc)
+        results.append([acc[1], cluster_acc[1]])
+    results = np.array(results)
+#    plt.plot(np.arange(0.1,1.0,0.1), results[:,0], label="full dataset accuracy with Gaussian noise (rand indices)", marker='o')
+    plt.plot(np.arange(0.1,1.0, 0.1), results[:,1], label="cluster accuracy with Gaussian noise (random features)", marker='o')
+
+    results = []
+    for i in np.arange(0.1,1.0, 0.1):
+        cf_dataset = pickle.load(open("{}_{}.pkl".format(args.prefix_rand_cf, round(i,2)), "rb"))
+        X_cf_dataset = np.concatenate([cf_dataset[0], cf_dataset[1]])
+        y_cf_dataset = np.concatenate([cf_dataset[2], cf_dataset[3]])
+        #X_cf_dataset = cf_dataset[0]                                                                                        
+        #y_cf_dataset = cf_dataset[1]                                                                                        
+        cluster_inds = cf_dataset[-1]
+        print(model_nt3.metrics_names)
+        acc = model_nt3.evaluate(X_cf_dataset, y_cf_dataset)
+        cluster_acc = model_nt3.evaluate(X_cf_dataset[cluster_inds], y_cf_dataset[cluster_inds])
+        print(i, acc, cluster_acc)
+        results.append([acc[1], cluster_acc[1]])
+    results = np.array(results)
+#    plt.plot(np.arange(0.1,1.0,0.1), results[:,0], label="full dataset accuracy with Gaussian noise (cf indices)", marker='o')
+    plt.plot(np.arange(0.1,1.0, 0.1), results[:,1], label="cluster accuracy with Gaussian noise (cf features)", marker='o')
+
+
+    plt.xlabel("Noise fraction in cluster")
+    plt.ylabel("Accuracy")
+    plt.legend()
+    plt.title("Model accuracy with counterfactual noise injection")
+    plt.savefig(args.o)
+
+if __name__ == "__main__":
+    main()
diff --git a/Pilot1/NT3/nt3_cf/threshold.py b/Pilot1/NT3/nt3_cf/threshold.py
new file mode 100644
index 00000000..cdd339e6
--- /dev/null
+++ b/Pilot1/NT3/nt3_cf/threshold.py
@@ -0,0 +1,70 @@
+# Example run python threshold.py -d nt3.autosave.data.pkl -c small_cf.pkl -t 0.2 -o small_threshold.pkl
+import pickle
+import numpy as np
+import argparse
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', type=str, 
+                        help='data input file', required=True)
+    parser.add_argument('-c', type=str, 
+                        help='counterfactual input file', required=True)
+    parser.add_argument('-o', type=str, 
+                        help='output file', required=True)
+    parser.add_argument('-t', type=float,
+                        help='threshold value', required=True)
+
+    args = parser.parse_args()
+    return args
+
+def threshold(t_value, X, y, cf):
+    pos = []
+    neg = []
+    cf_classes = []
+    inds = []
+    diffs = []
+    for i in range(len(cf)):
+        test_y = X[i].flatten()
+        test_cf = cf[i][1].flatten()
+        
+        diff = test_y-test_cf
+        max_value = np.max(np.abs(diff))
+
+        ind_pos = np.where(diff > t_value*max_value)
+        ind_neg = np.where(diff < -t_value*max_value)
+
+        cf_class = np.abs(1-np.argmax(y[i]))
+
+        pos.append(ind_pos)
+        neg.append(ind_neg)
+        cf_classes.append(cf_class)
+        inds.append(cf[i][0])
+        diffs.append(diff)
+            
+    return pos,neg,cf_classes,inds, diffs
+
+def main():
+    args = get_args()
+    with open(args.d, 'rb') as pickle_file:
+        X_train,X_test, Y_train,Y_test = pickle.load(pickle_file)
+        
+    with open(args.c, 'rb') as pickle_file:
+        cf = pickle.load(pickle_file)
+    
+    X = np.concatenate([X_train,X_test])
+    Y = np.concatenate([Y_train, Y_test])
+#     X=X_test
+#     Y=Y_test
+    pos,neg,cf_classes,inds, diff = threshold(args.t, X, Y, cf)
+    
+    # Note that sample index is here to keep track of counterfactuals that succeeded, counterfactuals that failed are not included here
+    results = {'sample index': inds, 
+               'positive threshold indices': pos, 
+               'negative threshold indices':neg, 
+               'counterfactual class':cf_classes,
+               'perturbation vector': diff}
+    pickle.dump(results, open(args.o, "wb"))
+        
+    
+if __name__ == "__main__":
+    main()
diff --git a/Pilot1/NT3/nt3_default_model.txt b/Pilot1/NT3/nt3_default_model.txt
index 708b7051..c49c7645 100644
--- a/Pilot1/NT3/nt3_default_model.txt
+++ b/Pilot1/NT3/nt3_default_model.txt
@@ -10,7 +10,7 @@ out_activation = 'softmax'
 loss = 'categorical_crossentropy'
 optimizer = 'sgd'
 metrics = 'accuracy'
-epochs = 400
+epochs = 10
 batch_size = 20
 learning_rate = 0.001
 dropout = 0.1
@@ -31,3 +31,4 @@ timeout = 3600
 ckpt_restart_mode = 'off'
 ckpt_save_interval = 0
 ckpt_checksum = True
+noise_save_cf = True
diff --git a/Pilot1/NT3/nt3_noise_model.txt b/Pilot1/NT3/nt3_noise_model.txt
index 6c8f1d73..3f2cc9f0 100644
--- a/Pilot1/NT3/nt3_noise_model.txt
+++ b/Pilot1/NT3/nt3_noise_model.txt
@@ -38,4 +38,5 @@ alpha = 0.3
 alpha_scale_factor = 0.8
 init_abs_epoch = 5
 task_list = 0
-task_names = ['activation_4']
+noise_save_cf = True
+task_names = ['activation_4']
\ No newline at end of file
diff --git a/Pilot1/NT3/run_abstention_sweep.sh b/Pilot1/NT3/run_abstention_sweep.sh
new file mode 100755
index 00000000..7978ecc1
--- /dev/null
+++ b/Pilot1/NT3/run_abstention_sweep.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+#vals=0.1
+#for filename in /vol/ml/shahashka/temp/Benchmarks/Pilot1/NT3/nt3_cf/noise_both_clusters/nt3.data.threshold.*; do
+#    echo $filename
+#    python nt3_abstention_keras2.py --noise_cf $filename --output_dir cf_sweep_1104 --run_id $(basename $filename) --epochs 100
+#    #cp cf_sweep_0902/EXP000/RUN000/training.log ${filename}_training_0902.log 
+#done
+
+for i in $(seq 0 0.1 1); do
+    echo $i
+    for j in $(seq 1 1 5); do
+	python nt3_baseline_keras2.py --label_noise $i --output_dir baseline_label_noise_$i --run_id RUN$j
+    done
+done
diff --git a/common/file_utils.py b/common/file_utils.py
index a1cfdb0b..b11bf90d 100644
--- a/common/file_utils.py
+++ b/common/file_utils.py
@@ -204,7 +204,6 @@ def directory_from_parameters(params, commonroot='Output'):
             String to specify the common folder to store results.
 
     """
-
     if commonroot in set(['.', './']):  # Same directory --> convert to absolute path
         outdir = os.path.abspath('.')
     else:  # Create path specified
diff --git a/common/parsing_utils.py b/common/parsing_utils.py
index 9d8b68cb..297a0368 100644
--- a/common/parsing_utils.py
+++ b/common/parsing_utils.py
@@ -97,6 +97,17 @@
         'help': 'set the run unique identifier.'}
 ]
 
+noise_conf = [ 
+    {'name': 'noise_save_cf',
+        'type': bool,
+        'default': False,
+        'help': 'save the model (Tensoflow saved model format) and data (pickle) objects for cf runs'},
+    {'name': 'noise_cf', 
+     'type': str,
+     'default': None,
+     'help': 'pickle file to hold dataset with noise already added through counterfactuals'}
+]
+
 logging_conf = [
     {'name': 'verbose',
         'abv': 'v',
@@ -311,7 +322,7 @@
 ]
 
 
-registered_conf = [basic_conf, input_output_conf, logging_conf, data_preprocess_conf, model_conf, training_conf, cyclic_learning_conf, ckpt_conf]
+registered_conf = [basic_conf, input_output_conf, logging_conf, data_preprocess_conf, model_conf, training_conf, cyclic_learning_conf, ckpt_conf, noise_conf]
 
 
 def extract_keywords(lst_dict, kw):
@@ -375,7 +386,7 @@ class ArgumentStruct:
        or object entries) can be used.
     """
     def __init__(self, **entries):
-        self.__dict__.update(entries)
+       self.__dict__.update(entries)
 
 
 class ListOfListsAction(argparse.Action):
@@ -562,7 +573,6 @@ def args_overwrite_config(args, config):
     for key in args_dict.keys():
         # try casting here
         params[key] = args_dict[key]
-
     if 'data_type' not in params:
         params['data_type'] = DEFAULT_DATATYPE
     else: