Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ All notable changes to the Databricks Labs Data Generator will be documented in
### unreleased

#### Fixed
* Updated build scripts to use Ubuntu 22.04 to correspond to environment in Databricks runtime
* Refactored `DataAnalyzer` and `BasicStockTickerProvider` to comply with ANSI SQL standards
* Refactored `Constraint` to treat `_filterExpression` and `_calculatedFilterExpression` as instance variables
* Removed internal modification of `SparkSession`

#### Changed
* Added type hints for modules and classes
* Changed base Databricks runtime version to DBR 13.3 LTS (based on Apache Spark 3.4.1) - minimum supported version
of Python is now 3.10.12
* Updated build tooling to use [hatch](https://hatch.pypa.io/latest/)
Expand All @@ -23,6 +24,7 @@ All notable changes to the Databricks Labs Data Generator will be documented in
#### Added
* Added support for serialization to/from JSON format
* Added Ruff and mypy tooling
* Added `OutputDataset` class and the ability to save a `DataGenerator` to an output table or files


### Version 0.4.0 Hotfix 2
Expand Down
77 changes: 60 additions & 17 deletions dbldatagen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,44 @@
"""

from .data_generator import DataGenerator
from .datagen_constants import DEFAULT_RANDOM_SEED, RANDOM_SEED_RANDOM, RANDOM_SEED_FIXED, \
RANDOM_SEED_HASH_FIELD_NAME, MIN_PYTHON_VERSION, MIN_SPARK_VERSION, \
INFER_DATATYPE, SPARK_DEFAULT_PARALLELISM
from .utils import ensure, topologicalSort, mkBoundsList, coalesce_values, \
deprecated, parse_time_interval, DataGenError, split_list_matching_condition, strip_margins, \
json_value_from_path, system_time_millis
from .datagen_constants import (
DEFAULT_RANDOM_SEED,
RANDOM_SEED_RANDOM,
RANDOM_SEED_FIXED,
RANDOM_SEED_HASH_FIELD_NAME,
MIN_PYTHON_VERSION,
MIN_SPARK_VERSION,
INFER_DATATYPE,
SPARK_DEFAULT_PARALLELISM,
)
from .utils import (
ensure,
topologicalSort,
mkBoundsList,
coalesce_values,
deprecated,
parse_time_interval,
DataGenError,
split_list_matching_condition,
strip_margins,
json_value_from_path,
system_time_millis,
)

from ._version import __version__
from .column_generation_spec import ColumnGenerationSpec
from .column_spec_options import ColumnSpecOptions
from .constraints import Constraint, ChainedRelation, LiteralRange, LiteralRelation, NegativeValues, PositiveValues, \
RangedValues, SqlExpr, UniqueCombinations
from .constraints import (
Constraint,
ChainedRelation,
LiteralRange,
LiteralRelation,
NegativeValues,
PositiveValues,
RangedValues,
SqlExpr,
UniqueCombinations,
)
from .data_analyzer import DataAnalyzer
from .schema_parser import SchemaParser
from .daterange import DateRange
Expand All @@ -48,24 +74,41 @@
from .html_utils import HtmlUtils
from .datasets_object import Datasets
from .config import OutputDataset
from .datagen_types import ColumnLike

__all__ = ["data_generator", "data_analyzer", "schema_parser", "daterange", "nrange",
"column_generation_spec", "utils", "function_builder",
"spark_singleton", "text_generators", "datarange", "datagen_constants",
"text_generator_plugins", "html_utils", "datasets_object", "constraints", "config"
]
__all__ = [
"data_generator",
"data_analyzer",
"schema_parser",
"daterange",
"nrange",
"column_generation_spec",
"utils",
"function_builder",
"spark_singleton",
"text_generators",
"datarange",
"datagen_constants",
"text_generator_plugins",
"html_utils",
"datasets_object",
"constraints",
"config",
"datagen_types",
]


def python_version_check(python_version_expected):
"""Check against Python version

Allows minimum version to be passed in to facilitate unit testing
Allows minimum version to be passed in to facilitate unit testing

:param python_version_expected: = minimum version of python to support as tuple e.g (3,6)
:return: True if passed
:param python_version_expected: = minimum version of python to support as tuple e.g (3,6)
:return: True if passed

"""
"""
import sys

return sys.version_info >= python_version_expected


Expand Down
2 changes: 1 addition & 1 deletion dbldatagen/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


def get_version(version: str) -> VersionInfo:
""" Get version info object for library.
"""Get version info object for library.

:param version: version string to parse for version information

Expand Down
Loading