Add tests for better coverage of NRange

ghanse · ghanse · commit 99ba3580025e · 2024-12-08T09:53:33.000-05:00
diff --git a/tests/test_options.py b/tests/test_options.py
@@ -218,6 +218,25 @@ def test_random2(self):
         colSpec3 = ds.getColumnSpec("code3")
         assert colSpec3.random is True
 
+    def test_random3(self):
+        # will have implied column `id` for ordinal of row
+        ds = (
+            dg.DataGenerator(sparkSession=spark, name="test_dataset1", rows=500, partitions=1, random=True)
+            .withIdOutput()
+            .withColumn("val1", "decimal(5,2)", maxValue=20.0, step=0.01, random=True)
+            .withColumn("val2", "float", maxValue=20.0, random=True)
+            .withColumn("val3", "double", maxValue=20.0, random=True)
+            .withColumn("val4", "byte", maxValue=15, random=True)
+            .withColumn("val5", "short", maxValue=31, random=True)
+            .withColumn("val6", "integer", maxValue=63, random=True)
+            .withColumn("val7", "long", maxValue=127, random=True)
+        )
+
+        df = ds.build()
+        cols = ["val1", "val2", "val3", "val4", "val5", "val6", "val7"]
+        for col in cols:
+            assert df.collect() != df.orderBy(col).collect(), f"Random values were not generated for {col}"
+
     def test_random_multiple_columns(self):
         # will have implied column `id` for ordinal of row
         ds = (
diff --git a/tests/test_quick_tests.py b/tests/test_quick_tests.py
@@ -1,7 +1,11 @@
 from datetime import timedelta, datetime
 
 import pytest
-from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType, DateType
+from pyspark.sql.types import (
+    StructType, StructField, IntegerType, StringType, FloatType, DateType, DecimalType, DoubleType, ByteType,
+    ShortType, LongType
+)
+
 
 import dbldatagen as dg
 from dbldatagen import DataGenerator
@@ -403,6 +407,28 @@ def test_basic_prefix(self):
         rowCount = formattedDF.count()
         assert rowCount == 1000
 
+    def test_missing_range_values(self):
+        column_types = [FloatType(), DoubleType(), ByteType(), ShortType(), IntegerType(), LongType()]
+        for column_type in column_types:
+            range_no_min = NRange(maxValue=1.0)
+            range_no_max = NRange(minValue=0.0)
+            range_no_min.adjustForColumnDatatype(column_type)
+            assert range_no_min.min == NRange._getNumericDataTypeRange(column_type)[0]
+            assert range_no_min.step == 1
+            range_no_max.adjustForColumnDatatype(column_type)
+            assert range_no_max.max == NRange._getNumericDataTypeRange(column_type)[1]
+            assert range_no_max.step == 1
+
+    def test_range_with_until(self):
+        range_until = NRange(step=2, until=100)
+        range_until.adjustForColumnDatatype(IntegerType())
+        assert range_until.minValue == 0
+        assert range_until.maxValue == 101
+
+    def test_empty_range(self):
+        empty_range = NRange()
+        assert empty_range.isEmpty()
+
     def test_reversed_ranges(self):
         testDataSpec = (dg.DataGenerator(sparkSession=spark, name="ranged_data", rows=100000,
                                          partitions=4)