Update dice-ml to 0.10.0 (#2145)

gaugup · web-flow · commit af9b099054ab · 2023-06-26T23:18:38.000Z
* Update dice-ml to 0.10.0

Signed-off-by: Gaurav Gupta &lt;gaugup@microsoft.com&gt;

* Fix lint

Signed-off-by: Gaurav Gupta &lt;gaugup@microsoft.com&gt;

* Fix failing test

Signed-off-by: Gaurav Gupta &lt;gaugup@microsoft.com&gt;

---------

Signed-off-by: Gaurav Gupta &lt;gaugup@microsoft.com&gt;
diff --git a/responsibleai/requirements.txt b/responsibleai/requirements.txt
@@ -1,4 +1,4 @@
-dice-ml>=0.9,<0.10
+dice-ml>=0.10,<0.11
 econml>=0.14.1
 statsmodels<0.14.0
 jsonschema
diff --git a/responsibleai/tests/common_utils.py b/responsibleai/tests/common_utils.py
@@ -2,14 +2,11 @@
 # Licensed under the MIT License.
 
 import json
-import os
-import shutil
-import zipfile
-from urllib.request import urlretrieve
 
 import numpy as np
 import pandas as pd
 import pytest
+from dice_ml.utils import helpers
 # Defines common utilities for responsibleai tests
 from sklearn.model_selection import train_test_split
 
@@ -44,191 +41,7 @@ def __init__(self):
         pass
 
     def fetch(self):
-        """
-        Loads adult income dataset from
-        https://archive.ics.uci.edu/ml/datasets/Adult and prepares the
-        data for data analysis based on https://rpubs.com/H_Zhu/235617
-
-        :return adult_data: returns preprocessed adult income dataset.
-        """
-        # TODO: Revert to using load_adult_income_dataset once dice-ml has a
-        # new release with the fix.
-        # Download the adult dataset from
-        # https://archive.ics.uci.edu/static/public/2/adult.zip as a zip folder
-        outdirname = "adult"
-        zipfilename = outdirname + ".zip"
-        urlretrieve(
-            "https://archive.ics.uci.edu/static/public/2/adult.zip",
-            zipfilename,
-        )
-        with zipfile.ZipFile(zipfilename, "r") as unzip:
-            unzip.extractall(outdirname)
-
-        raw_data = np.genfromtxt(
-            outdirname + "/adult.data",
-            delimiter=", ",
-            dtype=str,
-            invalid_raise=False,
-        )
-
-        #  column names from "https://archive.ics.uci.edu/ml/datasets/Adult"
-        column_names = [
-            "age",
-            "workclass",
-            "fnlwgt",
-            "education",
-            "educational-num",
-            "marital-status",
-            "occupation",
-            "relationship",
-            "race",
-            "gender",
-            "capital-gain",
-            "capital-loss",
-            "hours-per-week",
-            "native-country",
-            "income",
-        ]
-
-        adult_data = pd.DataFrame(raw_data, columns=column_names)
-
-        # For more details on how the below transformations are made,
-        # please refer to https://rpubs.com/H_Zhu/235617
-        adult_data = adult_data.astype(
-            {
-                "age": np.int64,
-                "educational-num": np.int64,
-                "hours-per-week": np.int64,
-            }
-        )
-
-        adult_data = adult_data.replace(
-            {
-                "workclass": {
-                    "Without-pay": "Other/Unknown",
-                    "Never-worked": "Other/Unknown",
-                }
-            }
-        )
-        adult_data = adult_data.replace(
-            {
-                "workclass": {
-                    "Federal-gov": "Government",
-                    "State-gov": "Government",
-                    "Local-gov": "Government",
-                }
-            }
-        )
-        adult_data = adult_data.replace(
-            {
-                "workclass": {
-                    "Self-emp-not-inc": "Self-Employed",
-                    "Self-emp-inc": "Self-Employed",
-                }
-            }
-        )
-        adult_data = adult_data.replace(
-            {
-                "workclass": {
-                    "Never-worked": "Self-Employed",
-                    "Without-pay": "Self-Employed",
-                }
-            }
-        )
-        adult_data = adult_data.replace({"workclass": {"?": "Other/Unknown"}})
-
-        adult_data = adult_data.replace(
-            {
-                "occupation": {
-                    "Adm-clerical": "White-Collar",
-                    "Craft-repair": "Blue-Collar",
-                    "Exec-managerial": "White-Collar",
-                    "Farming-fishing": "Blue-Collar",
-                    "Handlers-cleaners": "Blue-Collar",
-                    "Machine-op-inspct": "Blue-Collar",
-                    "Other-service": "Service",
-                    "Priv-house-serv": "Service",
-                    "Prof-specialty": "Professional",
-                    "Protective-serv": "Service",
-                    "Tech-support": "Service",
-                    "Transport-moving": "Blue-Collar",
-                    "Unknown": "Other/Unknown",
-                    "Armed-Forces": "Other/Unknown",
-                    "?": "Other/Unknown",
-                }
-            }
-        )
-
-        adult_data = adult_data.replace(
-            {
-                "marital-status": {
-                    "Married-civ-spouse": "Married",
-                    "Married-AF-spouse": "Married",
-                    "Married-spouse-absent": "Married",
-                    "Never-married": "Single",
-                }
-            }
-        )
-
-        adult_data = adult_data.replace(
-            {
-                "race": {
-                    "Black": "Other",
-                    "Asian-Pac-Islander": "Other",
-                    "Amer-Indian-Eskimo": "Other",
-                }
-            }
-        )
-
-        adult_data = adult_data[
-            [
-                "age",
-                "workclass",
-                "education",
-                "marital-status",
-                "occupation",
-                "race",
-                "gender",
-                "hours-per-week",
-                "income",
-            ]
-        ]
-
-        adult_data = adult_data.replace({"income": {"<=50K": 0, ">50K": 1}})
-
-        adult_data = adult_data.replace(
-            {
-                "education": {
-                    "Assoc-voc": "Assoc",
-                    "Assoc-acdm": "Assoc",
-                    "11th": "School",
-                    "10th": "School",
-                    "7th-8th": "School",
-                    "9th": "School",
-                    "12th": "School",
-                    "5th-6th": "School",
-                    "1st-4th": "School",
-                    "Preschool": "School",
-                }
-            }
-        )
-
-        adult_data = adult_data.rename(
-            columns={
-                "marital-status": "marital_status",
-                "hours-per-week": "hours_per_week",
-            }
-        )
-
-        train, _ = train_test_split(adult_data, test_size=0.2, random_state=17)
-        adult_data = train.reset_index(drop=True)
-
-        # Remove the downloaded dataset
-        if os.path.isdir(outdirname):
-            entire_path = os.path.abspath(outdirname)
-            shutil.rmtree(entire_path)
-
-        return adult_data
+        return helpers.load_adult_income_dataset()
 
 
 def create_adult_income_dataset(create_small_dataset=True):
diff --git a/responsibleai/tests/counterfactual_manager_validator.py b/responsibleai/tests/counterfactual_manager_validator.py
@@ -2,8 +2,6 @@
 # Licensed under the MIT License.
 
 import pytest
-from dice_ml.utils.exception import \
-    UserConfigValidationException as DiceException
 
 from raiutils.exceptions import UserConfigValidationException
 from responsibleai._internal.constants import (CounterfactualManagerKeys,
@@ -112,7 +110,7 @@ def validate_counterfactual(cf_analyzer,
             counterfactual_props=cf_analyzer.counterfactual.list(),
             expected_counterfactuals=2)
     else:
-        with pytest.raises(DiceException):
+        with pytest.raises(UserConfigValidationException):
             cf_analyzer.counterfactual.add(
                 total_CFs=-2,
                 method='random',

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-dice-ml>=0.9,<0.10`
	`1`	`+dice-ml>=0.10,<0.11`
`2`	`2`	`econml>=0.14.1`
`3`	`3`	`statsmodels<0.14.0`
`4`	`4`	`jsonschema`