Merge branch 'main' of github.com:SDM-TIB/InterpretME into main

yashrajchudasama26 · yashrajchudasama26 · commit efd93395f9c0 · 2022-08-25T16:16:08.000+02:00
diff --git a/InterpretME/classification.py b/InterpretME/classification.py
@@ -260,7 +260,7 @@ def binary_classification(sampled_data, sampled_target, imp_features, cross_vali
             for f in range(important_features_size):
                 important_features.add(X.columns.values[indices[f]])
 
-    data = plot_feature_importance(estimator.feature_importances_, X.columns, model, st)
+    data = plot_feature_importance(estimator.feature_importances_, X.columns)
     results['feature_importance'] = data
 
     # Taking important features
@@ -401,7 +401,7 @@ def multiclass(sampled_data, sampled_target, imp_features, cv, classes, st, lime
             for f in range(important_features_size):
                 important_features.add(X.columns.values[indices[f]])
 
-    data = plot_feature_importance(estimator.feature_importances_, X.columns, model, st)
+    data = plot_feature_importance(estimator.feature_importances_, X.columns)
     results['feature_importance'] = data
 
     # Taking important features
diff --git a/InterpretME/plots.py b/InterpretME/plots.py
@@ -4,7 +4,7 @@
 from validating_models.visualizations.classification import confusion_matrix_decomposition
 
 
-def sampling(results,path):
+def sampling(results, path):
     """Sampling strategy plots.
 
     Parameters
@@ -14,23 +14,17 @@ def sampling(results,path):
     path : str
         Path to save plot results.
 
-    Returns
-    -------
-
     """
-
-    print("########################################################################")
-    print("************************* Sampling strategy ****************************")
-    print("########################################################################")
     autopct = "%.2f"
     val = results['sampling']
-    run = results['run_id']
+    file = path + f"/sampling_{results['run_id']}.png"
+    print("Saving sampling strategy plot to", file)
     val.plot.pie(autopct=autopct)
     plt.title("Sampling Strategy")
-    plt.savefig(path+f'/sampling_{run}.png')
+    plt.savefig(file)
 
 
-def feature_importance(results,path):
+def feature_importance(results, path):
     """
 
     Parameters
@@ -40,15 +34,10 @@ def feature_importance(results,path):
     path : str
         Path to save plot results.
 
-    Returns
-    -------
-
     """
-    print("#####################################################################")
-    print("******************* Feature Importance plot *************************")
-    print("#####################################################################")
     fi_df = results['feature_importance']
-    run = results['run_id']
+    file = path + f"/Feature Importance_{results['run_id']}.png"
+    print("Saving feature importance plot to", file)
     # Define size of bar plot
     plt.figure(figsize=(20, 15))
     # Plot Searborn bar chart
@@ -57,7 +46,7 @@ def feature_importance(results,path):
     plt.title('FEATURE IMPORTANCE')
     plt.xlabel('FEATURE IMPORTANCE')
     plt.ylabel('FEATURE NAMES')
-    plt.savefig(path +f'/Feature Importance_{run}.png')
+    plt.savefig(file)
 
 
 def decision_trees(results, path):
@@ -70,16 +59,12 @@ def decision_trees(results, path):
     path : str
         Path to save plot results.
 
-    Returns
-    -------
-
     """
-    print("#####################################################################")
-    print("*********************** Decision Trees ******************************")
-    print("#####################################################################")
+    file = path + f"/Decision_trees_{results['run_id']}.svg"
+    print("Saving decision trees to", file)
     vis = results['dtree']
-    run = results['run_id']
-    vis.save(path+f'/Decision_tree_{run}.svg')
+    vis.save(file)
+
 
 def constraints_decision_trees(results, path, constraint_num):
     """
@@ -93,13 +78,8 @@ def constraints_decision_trees(results, path, constraint_num):
     constraint_num : list
         Number of constraints for saving plots.
 
-    Returns
-    -------
-
     """
-    print("#########################################################################")
-    print("*************************** Constraints Decision Trees ******************")
-    print("##########################################################################")
+    print("Saving constraints decision trees to", path)
     run = results['run_id']
     checker = results['checker']
     shadow_tree = results['shadow_tree']
@@ -121,11 +101,3 @@ def constraints_decision_trees(results, path, constraint_num):
                 plot = constraint_viz.dtreeviz(shadow_tree, checker, constraints, coverage=True,
                                                non_applicable_counts=non_applicable_counts)
                 plot.save(path + f'/constraints_validation_dtree_{run}.svg')
-
-
-
-
-
-
-
-
diff --git a/LIBRARY.md b/LIBRARY.md
@@ -24,10 +24,10 @@ pipeline(path_config, sampling, cv, imp_features, test_split, model, lime_result
 
 `pipeline()` executes the whole pipeline; including extracting data and metadata from the input KGs, validating SHACL constraints, preprocessing the data and running predictive models.
 InterpretME aims at collecting metadata at each step of pipeline.
-The current version of InterpretME resorts to interpretable surrogate tools like `LIME` [1].
+The current version of InterpretME resorts to interpretable surrogate tools like LIME [1].
 The user can provide a path to store the LIME results.
 Even model performance metrics like accuracy, precision etc. are recorded as metadata.
-The RDF mapping language (`RML`) is used to define mappings for the metadata collected from the predictive pipeline in order to integrate them into the **InterpretME KG**.
+The RDF mapping language (RML) is used to define mappings for the metadata collected from the predictive pipeline in order to integrate them into the **InterpretME KG**.
 The RML mappings are used by the SDM-RDFizer [2], an efficient RML engine for creating knowledge graphs, to semantify the metadata.
 The function `pipeline()` returns results from the pipeline which are used later in traceability of a target entity.
 
@@ -131,4 +131,4 @@ A Python dictionary following the SPARQL protocol with the query result.
 
 [2] E. Iglesias, S. Jozashoori, D. Chaves-Fraga, D. Collarana and M.-E. Vidal. SDM-RDFizer: An RML Interpreter for the Efficient Creation of RDF Knowledge Graphs. In: CIKM ’20:Proceedings of the 29th ACM International Conference on Information & Knowledge Management, ACM, New York, NY,USA, 2020. DOI: [10.1145/3340531.3412881](https://dl.acm.org/doi/pdf/10.1145/3340531.3412881).
 
-[3] P.D. Rohde. DeTrusty v0.6.1, August 2022. DOI: [10.5281/zenodo.6998001](https://doi.org/10.5281/zenodo.6998001).
+[3] P.D. Rohde. DeTrusty v0.6.1, August 2022. DOI: [10.5281/zenodo.6998001](https://doi.org/10.5281/zenodo.6998001).
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1,2 @@
-include README.md
-include InterpretME/mappings/*.ttl
+include LIBRARY.md
+include InterpretME/mappings/*.ttl
diff --git a/README.md b/README.md
@@ -3,6 +3,11 @@
 [![Latest Release](http://img.shields.io/github/release/SDM-TIB/InterpretME.svg?logo=github)](https://github.com/SDM-TIB/InterpretME/releases)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
 
+[![Python Versions](https://img.shields.io/pypi/pyversions/InterpretME)](https://pypi.org/project/InterpretME)
+[![Package Format](https://img.shields.io/pypi/format/InterpretME)](https://pypi.org/project/InterpretME)
+[![Package Status](https://img.shields.io/pypi/status/InterpretME)](https://pypi.org/project/InterpretME)
+[![Package Version](https://img.shields.io/pypi/v/InterpretME)](https://pypi.org/project/InterpretME)
+
 # InterpretME
 
 ![InterpretME Architecture](https://raw.githubusercontent.com/SDM-TIB/InterpretME/main/images/architecture.png "InterpretME Architecture")
diff --git a/setup.py b/setup.py
@@ -1,12 +1,12 @@
-from setuptools import find_packages, setup
+from setuptools import setup
 
-with open("README.md", "r", encoding="utf8") as fh:
+with open("LIBRARY.md", "r", encoding="utf8") as fh:
     long_description = fh.read()
 
 setup(
     name='InterpretME',
     packages=['InterpretME'],
-    version='1.0.0',
+    version='1.1.0',
     description='An interpretable machine learning pipeline over knowledge graphs',
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -26,21 +26,22 @@
         'Operating System :: OS Independent'
     ],
     python_requires='>=3.8, <3.10',
-    install_requires=['pandas>=1.4.1',
-                      'imbalanced-learn>=0.9.0',
-                      'lime>=0.2.0',
-                      'pydotplus>=2.0.2',
-                      'svglib>=1.2.1',
-                      'colour>=0.1.5',
-                      'matplotlib<=3.3.4',
-                      'rdflib<=6.1.1',
-                      'seaborn>=0.11.2',
-                      'numpy>=1.21.6',
-                      'dtreeviz>=1.3.0',
-                      'python-slugify>=6.0.0',
-                      'requests>=2.27.0',
-                      'rdfizer>=4.5.4',
-                      'Detrusty>=0.6.1',
-                      'validating-models>=0.9.0'
-                      ]
+    install_requires=[
+        'pandas>=1.4.1',
+        'imbalanced-learn>=0.9.0',
+        'lime>=0.2.0',
+        'pydotplus>=2.0.2',
+        'svglib>=1.2.1',
+        'colour>=0.1.5',
+        'matplotlib<=3.3.4',
+        'rdflib<=6.1.1',
+        'seaborn>=0.11.2',
+        'numpy>=1.21.6',
+        'dtreeviz>=1.3.0',
+        'python-slugify>=6.0.0',
+        'requests>=2.27.0',
+        'rdfizer>=4.5.4',
+        'Detrusty>=0.6.1',
+        'validating-models>=0.9.0'
+    ]
 )