Skip to content

Commit 9e984eb

Browse files
committed
Improve test coverage
1 parent 475c1e8 commit 9e984eb

File tree

5 files changed

+100
-0
lines changed

5 files changed

+100
-0
lines changed

tests/test_distributions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,3 +486,19 @@ def test_exponential_generation_func(self):
486486

487487
assert s2 == pytest.approx(0.10, abs=0.05)
488488
assert m2 == pytest.approx(0.10, abs=0.05)
489+
490+
def test_exponential_requires_rate_for_scale(self):
491+
"""Ensure accessing scale without a rate produces a clear error."""
492+
exp = dist.Exponential()
493+
with pytest.raises(
494+
ValueError, match="Cannot compute value for 'scale'; Missing value for 'rate'"
495+
):
496+
_ = exp.scale
497+
498+
def test_exponential_requires_rate_for_generation(self):
499+
"""Ensure generating samples without a rate produces a clear error."""
500+
exp = dist.Exponential()
501+
with pytest.raises(
502+
ValueError, match="Cannot compute value for 'scale'; Missing value for 'rate'"
503+
):
504+
_ = exp.generateNormalizedDistributionSample()

tests/test_generation_from_data.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,26 @@ def test_df_containing_summary(self):
119119
summary_df = dg.DataAnalyzer(sparkSession=spark, df=df).summarizeToDF()
120120

121121
assert summary_df.count() == 10
122+
123+
def test_data_analyzer_requires_dataframe(self):
124+
"""Validate that DataAnalyzer cannot be initialized without a DataFrame."""
125+
with pytest.raises(
126+
ValueError, match="Argument `df` must be supplied when initializing a `DataAnalyzer`"
127+
):
128+
dg.DataAnalyzer()
129+
130+
def test_add_measure_to_summary_requires_dataframe(self):
131+
"""Validate that _addMeasureToSummary enforces a non-null dfData argument."""
132+
with pytest.raises(
133+
ValueError,
134+
match="Input DataFrame `dfData` must be supplied when adding measures to a summary",
135+
):
136+
dg.DataAnalyzer._addMeasureToSummary("measure_name", dfData=None)
137+
138+
def test_generator_default_attributes_from_type_requires_datatype(self):
139+
"""Validate that _generatorDefaultAttributesFromType enforces a DataType instance."""
140+
with pytest.raises(
141+
ValueError,
142+
match=r"Argument 'sqlType' with type .* must be an instance of `pyspark\.sql\.types\.DataType`",
143+
):
144+
dg.DataAnalyzer._generatorDefaultAttributesFromType("not-a-sql-type")

tests/test_quick_tests.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,42 @@ def test_empty_range(self):
464464
empty_range = NRange()
465465
assert empty_range.isEmpty()
466466

467+
def test_nrange_legacy_min_and_minvalue_conflict(self):
468+
"""Ensure conflicting legacy 'min' and 'minValue' arguments raise a clear error."""
469+
with pytest.raises(ValueError, match="Only one of 'minValue' and legacy 'min' may be specified"):
470+
NRange(minValue=0.0, min=1.0)
471+
472+
def test_nrange_legacy_min_must_be_numeric(self):
473+
"""Ensure legacy 'min' argument must be numeric."""
474+
with pytest.raises(ValueError, match=r"Legacy 'min' argument must be an integer or float\."):
475+
NRange(min="not-a-number")
476+
477+
def test_nrange_unexpected_kwargs_error_message(self):
478+
"""Ensure unexpected keyword arguments produce a helpful error."""
479+
with pytest.raises(ValueError, match=r"Unexpected keyword arguments for NRange: .*"):
480+
NRange(foo=1)
481+
482+
def test_nrange_maxvalue_and_until_conflict(self):
483+
"""Ensure conflicting 'maxValue' and 'until' arguments raise a clear error."""
484+
with pytest.raises(ValueError, match="Only one of 'maxValue' or 'until' may be specified."):
485+
NRange(maxValue=10, until=20)
486+
487+
def test_nrange_discrete_range_requires_min_max_step(self):
488+
"""Ensure getDiscreteRange validates required attributes."""
489+
rng = NRange(minValue=0.0, maxValue=10.0)
490+
with pytest.raises(
491+
ValueError, match="Range must have 'minValue', 'maxValue', and 'step' defined\\."
492+
):
493+
_ = rng.getDiscreteRange()
494+
495+
def test_nrange_discrete_range_step_must_be_non_zero(self):
496+
"""Ensure getDiscreteRange validates non-zero step."""
497+
rng = NRange(minValue=0.0, maxValue=10.0, step=0)
498+
with pytest.raises(
499+
ValueError, match="Parameter 'step' must be non-zero when computing discrete range\\."
500+
):
501+
_ = rng.getDiscreteRange()
502+
467503
def test_reversed_ranges(self):
468504
testDataSpec = (
469505
dg.DataGenerator(sparkSession=spark, name="ranged_data", rows=100000, partitions=4)

tests/test_ranged_values_and_dates.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pyspark.sql.functions as F
55
from pyspark.sql.types import DoubleType, ShortType, LongType, DecimalType, ByteType, DateType
66
from pyspark.sql.types import IntegerType, StringType, FloatType, TimestampType
7+
import pytest
78

89
import dbldatagen as dg
910
from dbldatagen import DateRange
@@ -1033,3 +1034,21 @@ def test_ranged_data_string5(self):
10331034
s1_expected_values = [f"testing {x:05} >>" for x in [1.5, 1.8, 2.1, 2.4]]
10341035
s1_values = [r[0] for r in results.select("s1").distinct().collect()]
10351036
self.assertSetEqual(set(s1_expected_values), set(s1_values))
1037+
1038+
1039+
def test_daterange_parse_interval_requires_value():
1040+
"""Validate DateRange.parseInterval requires a non-null interval string."""
1041+
with pytest.raises(ValueError, match="Parameter 'interval_str' must be specified"):
1042+
DateRange.parseInterval(None)
1043+
1044+
1045+
def test_daterange_compute_date_range_unique_values_positive():
1046+
"""Validate DateRange.computeDateRange enforces positive unique_values."""
1047+
with pytest.raises(ValueError, match="Parameter 'unique_values' must be a positive integer"):
1048+
DateRange.computeDateRange(begin=None, end=None, interval="days=1", unique_values=0)
1049+
1050+
1051+
def test_daterange_compute_timestamp_range_unique_values_positive():
1052+
"""Validate DateRange.computeTimestampRange enforces positive unique_values."""
1053+
with pytest.raises(ValueError, match="Parameter 'unique_values' must be a positive integer"):
1054+
DateRange.computeTimestampRange(begin=None, end=None, interval="days=1", unique_values=-5)

tests/test_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,9 @@ def test_json_value_from_path(self, path, jsonData, defaultValue, expectedValue)
219219
def test_system_time_millis(self):
220220
curr_time = system_time_millis()
221221
assert curr_time > 0
222+
223+
def test_topological_sort_cycle_error_message(self):
224+
"""Validate that topologicalSort raises a helpful error message for cyclic dependencies."""
225+
deps = [("a", {"b"}), ("b", {"a"})]
226+
with pytest.raises(ValueError, match="cyclic or missing dependency detected"):
227+
topologicalSort(deps)

0 commit comments

Comments
 (0)