Hotfix and version bump (#60)

gucciwang · Calvin Wang · Joan Aoanan · web-flow · commit b285157c131c · 2021-07-22T11:15:13.000-07:00
* list of constraints and better tests and vbump

* Slack link update

Co-authored-by: Calvin Wang &lt;calviwan@amazon.com&gt;
Co-authored-by: Joan Aoanan &lt;jaoanan@amazon.com&gt;
diff --git a/README.md b/README.md
@@ -18,9 +18,10 @@ There are 4 main components of Deequ, and they are:
 ![](imgs/pydeequ_architecture.jpg)
 
 ## 🎉 Announcements 🎉
+- With PyDeequ v0.1.8, we now officially support Spark3 ! Just make sure you have an environment variable `SPARK_VERSION` to specify your Spark version! 
 - We've release a blogpost on integrating PyDeequ onto AWS leveraging services such as AWS Glue, Athena, and SageMaker! Check it out: [Monitor data quality in your data lake using PyDeequ and AWS Glue](https://aws.amazon.com/blogs/big-data/monitor-data-quality-in-your-data-lake-using-pydeequ-and-aws-glue/).
 - Check out the [PyDeequ Release Announcement Blogpost](https://aws.amazon.com/blogs/big-data/testing-data-quality-at-scale-with-pydeequ/) with a tutorial walkthrough the Amazon Reviews dataset!
-- Join the PyDeequ community on [PyDeequ Slack](https://join.slack.com/t/pydeequ/shared_invite/zt-qopmmfgm-ajKRyxx0HgCiK50b9JhAFg) to chat with the devs!
+- Join the PyDeequ community on [PyDeequ Slack](https://join.slack.com/t/pydeequ/shared_invite/zt-te6bntpu-yaqPy7bhiN8Lu0NxpZs47Q) to chat with the devs!
 
 ## Quickstart
 
diff --git a/pydeequ/__init__.py b/pydeequ/__init__.py
@@ -12,7 +12,7 @@
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
 """Placeholder docstrings"""
-__version__ = "0.1.5"
+__version__ = "1.0.0"
 
 from pyspark.sql import SparkSession
 
diff --git a/pydeequ/checks.py b/pydeequ/checks.py
@@ -100,13 +100,19 @@ def __init__(self, spark_session: SparkSession, level: CheckLevel, description:
         for constraint in self.constraints:
             self.addConstraint(constraint)
 
+    def addConstraints(self, constraints: list):
+        self.constraints.extend(constraints)
+        for constraint in constraints:
+            self._Check = constraint._Check
+
     def addConstraint(self, constraint):
         """
         Returns a new Check object with the given constraints added to the constraints list.
         :param Constraint constraint: new constraint to be added.
         :return: new Check object
         """
-        raise NotImplementedError("Private factory method for other check methods")
+        self.constraints.append(constraint)
+        self._Check = constraint._Check
 
     def addFilterableContstraint(self, creationFunc):
         """Adds a constraint that can subsequently be replaced with a filtered version
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pydeequ"
-version = "0.1.8"
+version = "1.0.0"
 description = "PyDeequ - Unit Tests for Data"
 authors = ["Calvin Wang <calviwan@amazon.com>"]
 maintainers = ["Calvin Wang <calviwan@amazon.com>"]
diff --git a/tests/test_analyzers.py b/tests/test_analyzers.py
@@ -232,7 +232,7 @@ def test_ApproxCountDistinct(self):
         self.assertEqual(self.ApproxCountDistinct("b"), [Row(value=3)])
         self.assertEqual(self.ApproxCountDistinct("c"), [Row(value=2)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_approxCountDistinct(self):
         self.assertEqual(self.ApproxCountDistinct("b"), [Row(value=2)])
 
@@ -257,7 +257,7 @@ def UniqueValueRatio(self, columns, where=None):
         self.assertEqual(df_from_json.select("value").collect(), result_df.select("value").collect())
         return result_df.select("value").collect()
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_approxQuantiles(self):
         self.assertEqual(self.ApproxQuantiles("b", [0.2, 0.5, 0.73]), [Row(value=1.5), Row(value=2.0), Row(value=3.0)])
 
@@ -270,7 +270,7 @@ def test_Completeness(self):
         self.assertEqual(self.Completeness("c"), [Row(value=2 / 3)])
         self.assertEqual(self.Completeness("a"), [Row(value=1)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Completeness(self):
         self.assertEqual(self.Completeness("c"), [Row(value=1.0)])
 
@@ -285,7 +285,7 @@ def test_Correlation(self):
         self.assertEqual(self.Correlation("b", "d"), [Row(value=0.0)])
         self.assertEqual(self.Correlation("b", "a"), [])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Correlation(self):
         self.assertEqual(self.Correlation("b", "c"), [Row(value=-1.0)])
 
@@ -294,7 +294,7 @@ def test_CountDistinct(self):
         self.assertEqual(self.CountDistinct(["b", "c"]), [Row(value=3.0)])
         self.assertEqual(self.CountDistinct(["b", "d"]), [Row(value=3.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_CountDistinct(self):
         self.assertEqual(self.CountDistinct("b"), [Row(value=1.0)])
 
@@ -332,7 +332,7 @@ def test_DataType(self):
             ],
         )
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Datatype(self):
         self.assertEqual(
             self.Datatype("c"),
@@ -356,7 +356,7 @@ def test_Distinctness(self):
         self.assertEqual(self.Distinctness(["b", "c"]), [Row(value=1.0)])
         self.assertEqual(self.Distinctness(["b", "d"]), [Row(value=1.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Distinctness(self):
         self.assertEqual(self.Distinctness("b"), [Row(value=0)])
 
@@ -365,7 +365,7 @@ def test_Entropy(self):
         self.assertEqual(self.Entropy("a"), [Row(value=1.0986122886681096)])
         self.assertEqual(self.Entropy("c"), [Row(value=0.6931471805599453)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Entropy(self):
         self.assertEqual(self.Entropy("b"), [Row(value=0)])
 
@@ -395,7 +395,7 @@ def test_Histogram(self):
             ],
         )
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Histogram(self):
         self.assertEqual(
             self.Histogram("b"),
@@ -432,7 +432,7 @@ def test_Histogram_maxBins(self):
             ],
         )
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Histogram_maxBins(self):
         self.assertEqual(
             self.Histogram_maxBins("b"),
@@ -450,46 +450,46 @@ def test_fail_Histogram_maxBins(self):
     def test_Maximum(self):
         self.assertEqual(self.Maximum("b"), [Row(value=3.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Maximum(self):
         self.assertEqual(self.Maximum("c"), [Row(value=3.0)])
 
     def test_MaxLength(self):
         self.assertEqual(self.MaxLength("a"), [Row(value=3.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_MaxLength(self):
         self.assertEqual(self.MaxLength("b"), [Row(value=3.0)])
 
     def test_Mean(self):
         self.assertEqual(self.Mean("b"), [Row(value=2.0)])
         self.assertEqual(self.Mean("c"), [Row(value=11 / 2)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Mean(self):
         self.assertEqual(self.Mean("b"), [Row(value=3.0)])
 
     def test_Minimum(self):
         self.assertEqual(self.Minimum("b"), [Row(value=1.0)])
         self.assertEqual(self.Minimum("c"), [Row(value=5.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Minimum(self):
         self.assertEqual(self.Minimum("a"), [Row(value=3.0)])
         self.assertEqual(self.Minimum("b"), [Row(value=3.0)])
 
     def test_MinLength(self):
         self.assertEqual(self.MinLength("a"), [Row(value=3.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_MinLength(self):
         self.assertEqual(self.MinLength("a"), [])
 
     def test_MutualInformation(self):
         self.assertEqual(self.MutualInformation(["b", "c"]), [Row(value=0.7324081924454064)])
         self.assertEqual(self.MutualInformation(["b", "d"]), [Row(value=0.6365141682948128)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_MutualInformation(self):
         self.assertEqual(self.MutualInformation(["b", "d"]), [])
 
@@ -511,7 +511,7 @@ def test_Size(self):
         result_df_row = result_df.select("value").collect()
         self.assertEqual(result_df_row, [Row(value=3.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Size(self):
         result = self.AnalysisRunner.onData(self.df).addAnalyzer(Size()).run()
         result_df = result.select("value").collect()
@@ -521,15 +521,15 @@ def test_StandardDeviation(self):
         self.assertEqual(self.StandardDeviation("b"), [Row(value=0.816496580927726)])
         self.assertEqual(self.StandardDeviation("c"), [Row(value=0.5)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_StandardDeviation(self):
         self.assertEqual(self.StandardDeviation("c"), [Row(value=0.8)])
 
     def test_Sum(self):
         self.assertEqual(self.Sum("b"), [Row(value=6.0)])
         self.assertEqual(self.Sum("c"), [Row(value=11.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Sum(self):
         self.assertEqual(self.Sum("b"), [Row(value=3.0)])
 
@@ -538,15 +538,15 @@ def test_Uniqueness(self):
         self.assertEqual(self.Uniqueness(["b", "d"]), [Row(value=1.0)])
         self.assertEqual(self.Uniqueness(["a", "a"]), [Row(value=1.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_Uniqueness(self):
         self.assertEqual(self.Uniqueness(["a", "a"]), [])
 
     def test_UniqueValueRatio(self):
         self.assertEqual(self.UniqueValueRatio(["b", "d"]), [Row(value=1.0)])
         self.assertEqual(self.UniqueValueRatio(["b"]), [Row(value=1.0)])
 
-    @pytest.mark.skip(reason="@unittest.expectedFailure")
+    @pytest.mark.xfail(reason="@unittest.expectedFailure")
     def test_fail_UniqueValueRatio(self):
         self.assertEqual(self.UniqueValueRatio(["a", "a"]), [])
 
diff --git a/tests/test_anomaly_detection.py b/tests/test_anomaly_detection.py
@@ -302,7 +302,7 @@ def HoltWinters(self, analyzer_func, test, df_prev, df_curr=None):
     # TODO - Failing bcoz of
     # can not implement breeze.stats.DescriptiveStats, because it is not an interface
     # (breeze.stats.DescriptiveStats is in unnamed module of loader 'app')
-    @pytest.mark.skip(reason="TODO: breeze.stats.DescriptiveStats is in unnamed module of loader 'app'")
+    @pytest.mark.xfail(reason="TODO: breeze.stats.DescriptiveStats is in unnamed module of loader 'app'")
     def test_BatchNormalStrategy(self):
 
         # Interval is inclusive, so meet the requirements upper value is up to 9
@@ -363,7 +363,7 @@ def test_OnlineNormalStrategy(self):
     # TODO - Fix in deequ - Failing bcoz of
     # can not implement breeze.stats.DescriptiveStats, because it is not an interface
     # (breeze.stats.DescriptiveStats is in unnamed module of loader 'app')
-    @pytest.mark.skip(reason="TODO: breeze.stats.DescriptiveStats is in unnamed module of loader 'app'")
+    @pytest.mark.xfail(reason="TODO: breeze.stats.DescriptiveStats is in unnamed module of loader 'app'")
     def test_holtWinters(self):
 
         # must have 15 points of data
@@ -482,7 +482,7 @@ def get_anomalyDetector(self, anomaly):
         AnomalyDetector._set_jvm(self._jvm, strategy_jvm)
         return AnomalyDetector._anomaly_jvm
 
-    @unittest.skip("Not implemented yet!")
+    @pytest.mark.skip("Not implemented yet!")
     def test_anomalyDetector(self):
         self.get_anomalyDetector(SimpleThresholdStrategy(1.0, 3.0))
 
diff --git a/tests/test_checks.py b/tests/test_checks.py
diff --git a/tests/test_pydeequ.py b/tests/test_pydeequ.py
diff --git a/tests/test_repository.py b/tests/test_repository.py