Improve test coverage

ghanse · ghanse · commit 9e984eb650c0 · 2025-12-08T15:31:38.000-05:00
diff --git a/tests/test_distributions.py b/tests/test_distributions.py
@@ -486,3 +486,19 @@ def test_exponential_generation_func(self):
 
         assert s2 == pytest.approx(0.10, abs=0.05)
         assert m2 == pytest.approx(0.10, abs=0.05)
+
+    def test_exponential_requires_rate_for_scale(self):
+        """Ensure accessing scale without a rate produces a clear error."""
+        exp = dist.Exponential()
+        with pytest.raises(
+            ValueError, match="Cannot compute value for 'scale'; Missing value for 'rate'"
+        ):
+            _ = exp.scale
+
+    def test_exponential_requires_rate_for_generation(self):
+        """Ensure generating samples without a rate produces a clear error."""
+        exp = dist.Exponential()
+        with pytest.raises(
+            ValueError, match="Cannot compute value for 'scale'; Missing value for 'rate'"
+        ):
+            _ = exp.generateNormalizedDistributionSample()
diff --git a/tests/test_generation_from_data.py b/tests/test_generation_from_data.py
@@ -119,3 +119,26 @@ def test_df_containing_summary(self):
         summary_df = dg.DataAnalyzer(sparkSession=spark, df=df).summarizeToDF()
 
         assert summary_df.count() == 10
+
+    def test_data_analyzer_requires_dataframe(self):
+        """Validate that DataAnalyzer cannot be initialized without a DataFrame."""
+        with pytest.raises(
+            ValueError, match="Argument `df` must be supplied when initializing a `DataAnalyzer`"
+        ):
+            dg.DataAnalyzer()
+
+    def test_add_measure_to_summary_requires_dataframe(self):
+        """Validate that _addMeasureToSummary enforces a non-null dfData argument."""
+        with pytest.raises(
+            ValueError,
+            match="Input DataFrame `dfData` must be supplied when adding measures to a summary",
+        ):
+            dg.DataAnalyzer._addMeasureToSummary("measure_name", dfData=None)
+
+    def test_generator_default_attributes_from_type_requires_datatype(self):
+        """Validate that _generatorDefaultAttributesFromType enforces a DataType instance."""
+        with pytest.raises(
+            ValueError,
+            match=r"Argument 'sqlType' with type .* must be an instance of `pyspark\.sql\.types\.DataType`",
+        ):
+            dg.DataAnalyzer._generatorDefaultAttributesFromType("not-a-sql-type")
diff --git a/tests/test_quick_tests.py b/tests/test_quick_tests.py
@@ -464,6 +464,42 @@ def test_empty_range(self):
         empty_range = NRange()
         assert empty_range.isEmpty()
 
+    def test_nrange_legacy_min_and_minvalue_conflict(self):
+        """Ensure conflicting legacy 'min' and 'minValue' arguments raise a clear error."""
+        with pytest.raises(ValueError, match="Only one of 'minValue' and legacy 'min' may be specified"):
+            NRange(minValue=0.0, min=1.0)
+
+    def test_nrange_legacy_min_must_be_numeric(self):
+        """Ensure legacy 'min' argument must be numeric."""
+        with pytest.raises(ValueError, match=r"Legacy 'min' argument must be an integer or float\."):
+            NRange(min="not-a-number")
+
+    def test_nrange_unexpected_kwargs_error_message(self):
+        """Ensure unexpected keyword arguments produce a helpful error."""
+        with pytest.raises(ValueError, match=r"Unexpected keyword arguments for NRange: .*"):
+            NRange(foo=1)
+
+    def test_nrange_maxvalue_and_until_conflict(self):
+        """Ensure conflicting 'maxValue' and 'until' arguments raise a clear error."""
+        with pytest.raises(ValueError, match="Only one of 'maxValue' or 'until' may be specified."):
+            NRange(maxValue=10, until=20)
+
+    def test_nrange_discrete_range_requires_min_max_step(self):
+        """Ensure getDiscreteRange validates required attributes."""
+        rng = NRange(minValue=0.0, maxValue=10.0)
+        with pytest.raises(
+            ValueError, match="Range must have 'minValue', 'maxValue', and 'step' defined\\."
+        ):
+            _ = rng.getDiscreteRange()
+
+    def test_nrange_discrete_range_step_must_be_non_zero(self):
+        """Ensure getDiscreteRange validates non-zero step."""
+        rng = NRange(minValue=0.0, maxValue=10.0, step=0)
+        with pytest.raises(
+            ValueError, match="Parameter 'step' must be non-zero when computing discrete range\\."
+        ):
+            _ = rng.getDiscreteRange()
+
     def test_reversed_ranges(self):
         testDataSpec = (
             dg.DataGenerator(sparkSession=spark, name="ranged_data", rows=100000, partitions=4)
diff --git a/tests/test_ranged_values_and_dates.py b/tests/test_ranged_values_and_dates.py
@@ -4,6 +4,7 @@
 import pyspark.sql.functions as F
 from pyspark.sql.types import DoubleType, ShortType, LongType, DecimalType, ByteType, DateType
 from pyspark.sql.types import IntegerType, StringType, FloatType, TimestampType
+import pytest
 
 import dbldatagen as dg
 from dbldatagen import DateRange
@@ -1033,3 +1034,21 @@ def test_ranged_data_string5(self):
         s1_expected_values = [f"testing {x:05} >>" for x in [1.5, 1.8, 2.1, 2.4]]
         s1_values = [r[0] for r in results.select("s1").distinct().collect()]
         self.assertSetEqual(set(s1_expected_values), set(s1_values))
+
+
+def test_daterange_parse_interval_requires_value():
+    """Validate DateRange.parseInterval requires a non-null interval string."""
+    with pytest.raises(ValueError, match="Parameter 'interval_str' must be specified"):
+        DateRange.parseInterval(None)
+
+
+def test_daterange_compute_date_range_unique_values_positive():
+    """Validate DateRange.computeDateRange enforces positive unique_values."""
+    with pytest.raises(ValueError, match="Parameter 'unique_values' must be a positive integer"):
+        DateRange.computeDateRange(begin=None, end=None, interval="days=1", unique_values=0)
+
+
+def test_daterange_compute_timestamp_range_unique_values_positive():
+    """Validate DateRange.computeTimestampRange enforces positive unique_values."""
+    with pytest.raises(ValueError, match="Parameter 'unique_values' must be a positive integer"):
+        DateRange.computeTimestampRange(begin=None, end=None, interval="days=1", unique_values=-5)
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -219,3 +219,9 @@ def test_json_value_from_path(self, path, jsonData, defaultValue, expectedValue)
     def test_system_time_millis(self):
         curr_time = system_time_millis()
         assert curr_time > 0
+
+    def test_topological_sort_cycle_error_message(self):
+        """Validate that topologicalSort raises a helpful error message for cyclic dependencies."""
+        deps = [("a", {"b"}), ("b", {"a"})]
+        with pytest.raises(ValueError, match="cyclic or missing dependency detected"):
+            topologicalSort(deps)