Skip to content

Commit a3fd878

Browse files
authored
Format modules (#374)
* Format modules * Improve test coverage * Format tests * Refactor * Refactor * Improve test coverage * Fix attribute handling for Constraint * Reformat changelog
1 parent e7c4565 commit a3fd878

File tree

88 files changed

+8677
-5932
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+8677
-5932
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ All notable changes to the Databricks Labs Data Generator will be documented in
66
### unreleased
77

88
#### Fixed
9-
* Updated build scripts to use Ubuntu 22.04 to correspond to environment in Databricks runtime
109
* Refactored `DataAnalyzer` and `BasicStockTickerProvider` to comply with ANSI SQL standards
10+
* Refactored `Constraint` to treat `_filterExpression` and `_calculatedFilterExpression` as instance variables
1111
* Removed internal modification of `SparkSession`
1212

1313
#### Changed
14+
* Added type hints for modules and classes
1415
* Changed base Databricks runtime version to DBR 13.3 LTS (based on Apache Spark 3.4.1) - minimum supported version
1516
of Python is now 3.10.12
1617
* Updated build tooling to use [hatch](https://hatch.pypa.io/latest/)
@@ -23,6 +24,7 @@ All notable changes to the Databricks Labs Data Generator will be documented in
2324
#### Added
2425
* Added support for serialization to/from JSON format
2526
* Added Ruff and mypy tooling
27+
* Added `OutputDataset` class and the ability to save a `DataGenerator` to an output table or files
2628

2729

2830
### Version 0.4.0 Hotfix 2

dbldatagen/__init__.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,44 @@
2424
"""
2525

2626
from .data_generator import DataGenerator
27-
from .datagen_constants import DEFAULT_RANDOM_SEED, RANDOM_SEED_RANDOM, RANDOM_SEED_FIXED, \
28-
RANDOM_SEED_HASH_FIELD_NAME, MIN_PYTHON_VERSION, MIN_SPARK_VERSION, \
29-
INFER_DATATYPE, SPARK_DEFAULT_PARALLELISM
30-
from .utils import ensure, topologicalSort, mkBoundsList, coalesce_values, \
31-
deprecated, parse_time_interval, DataGenError, split_list_matching_condition, strip_margins, \
32-
json_value_from_path, system_time_millis
27+
from .datagen_constants import (
28+
DEFAULT_RANDOM_SEED,
29+
RANDOM_SEED_RANDOM,
30+
RANDOM_SEED_FIXED,
31+
RANDOM_SEED_HASH_FIELD_NAME,
32+
MIN_PYTHON_VERSION,
33+
MIN_SPARK_VERSION,
34+
INFER_DATATYPE,
35+
SPARK_DEFAULT_PARALLELISM,
36+
)
37+
from .utils import (
38+
ensure,
39+
topologicalSort,
40+
mkBoundsList,
41+
coalesce_values,
42+
deprecated,
43+
parse_time_interval,
44+
DataGenError,
45+
split_list_matching_condition,
46+
strip_margins,
47+
json_value_from_path,
48+
system_time_millis,
49+
)
3350

3451
from ._version import __version__
3552
from .column_generation_spec import ColumnGenerationSpec
3653
from .column_spec_options import ColumnSpecOptions
37-
from .constraints import Constraint, ChainedRelation, LiteralRange, LiteralRelation, NegativeValues, PositiveValues, \
38-
RangedValues, SqlExpr, UniqueCombinations
54+
from .constraints import (
55+
Constraint,
56+
ChainedRelation,
57+
LiteralRange,
58+
LiteralRelation,
59+
NegativeValues,
60+
PositiveValues,
61+
RangedValues,
62+
SqlExpr,
63+
UniqueCombinations,
64+
)
3965
from .data_analyzer import DataAnalyzer
4066
from .schema_parser import SchemaParser
4167
from .daterange import DateRange
@@ -48,24 +74,41 @@
4874
from .html_utils import HtmlUtils
4975
from .datasets_object import Datasets
5076
from .config import OutputDataset
77+
from .datagen_types import ColumnLike
5178

52-
__all__ = ["data_generator", "data_analyzer", "schema_parser", "daterange", "nrange",
53-
"column_generation_spec", "utils", "function_builder",
54-
"spark_singleton", "text_generators", "datarange", "datagen_constants",
55-
"text_generator_plugins", "html_utils", "datasets_object", "constraints", "config"
56-
]
79+
__all__ = [
80+
"data_generator",
81+
"data_analyzer",
82+
"schema_parser",
83+
"daterange",
84+
"nrange",
85+
"column_generation_spec",
86+
"utils",
87+
"function_builder",
88+
"spark_singleton",
89+
"text_generators",
90+
"datarange",
91+
"datagen_constants",
92+
"text_generator_plugins",
93+
"html_utils",
94+
"datasets_object",
95+
"constraints",
96+
"config",
97+
"datagen_types",
98+
]
5799

58100

59101
def python_version_check(python_version_expected):
60102
"""Check against Python version
61103
62-
Allows minimum version to be passed in to facilitate unit testing
104+
Allows minimum version to be passed in to facilitate unit testing
63105
64-
:param python_version_expected: = minimum version of python to support as tuple e.g (3,6)
65-
:return: True if passed
106+
:param python_version_expected: = minimum version of python to support as tuple e.g (3,6)
107+
:return: True if passed
66108
67-
"""
109+
"""
68110
import sys
111+
69112
return sys.version_info >= python_version_expected
70113

71114

dbldatagen/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323

2424
def get_version(version: str) -> VersionInfo:
25-
""" Get version info object for library.
25+
"""Get version info object for library.
2626
2727
:param version: version string to parse for version information
2828

0 commit comments

Comments
 (0)