Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
[![codecov](https://codecov.io/gh/mitchelllisle/sparkdantic/graph/badge.svg?token=O6PPQX4FEX)](https://codecov.io/gh/mitchelllisle/sparkdantic)
[![PyPI version](https://badge.fury.io/py/sparkdantic.svg)](https://badge.fury.io/py/sparkdantic)

> 1️⃣ version: 2.6.0
> 1️⃣ version: 2.6.1

> ✍️ author: Mitchell Lisle

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "sparkdantic"
version = "2.6.0"
version = "2.6.1"
description = "A pydantic -> spark schema library"
authors = ["Mitchell Lisle <[email protected]>"]
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.6.0
current_version = 2.6.1
commit = True
tag = False

Expand Down
2 changes: 1 addition & 1 deletion src/sparkdantic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '2.6.0'
__version__ = '2.6.1'
__author__ = 'Mitchell Lisle'
__email__ = '[email protected]'

Expand Down
16 changes: 12 additions & 4 deletions src/sparkdantic/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,6 @@ def create_json_spark_schema(
annotation_or_return_type = _get_annotation_or_return_type(info)
field_type = _get_union_type_arg(annotation_or_return_type)

description = getattr(info, 'description', None)
comment = {'comment': description} if description else {}

spark_type: Union[str, Dict[str, Any]]

try:
Expand Down Expand Up @@ -248,7 +245,7 @@ def create_json_spark_schema(
'name': name,
'type': spark_type,
'nullable': nullable,
'metadata': comment,
'metadata': _json_field_metadata(info),
}
fields.append(struct_field)
return {
Expand Down Expand Up @@ -627,3 +624,14 @@ def json_schema_to_ddl(json_schema: Dict[str, Any]) -> str:
field_ddls.append(field_ddl)

return ','.join(field_ddls)


def _json_field_metadata(info: ComputedFieldInfo) -> dict[str, str]:
description = getattr(info, 'description', None)
metadata = {'comment': description} if description else {}

examples = getattr(info, 'examples', None)
if examples:
metadata['examples'] = examples

return metadata
53 changes: 0 additions & 53 deletions tests/test_field_descriptions.py

This file was deleted.

88 changes: 88 additions & 0 deletions tests/test_field_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from pyspark.sql.types import StringType, StructField, StructType

from sparkdantic import SparkField, SparkModel


class DescriptionModel(SparkModel):
field_with_description: str = SparkField(description='This is a test description.')
field_with_examples: str = SparkField(examples=['test'])
field_with_description_and_examples: str = SparkField(
description='testing description', examples=['testing example']
)

field_without_metadata: str = SparkField()


def test_spark_schema_contains_field_metadata():
expected_schema = StructType(
[
StructField(
'field_with_description',
StringType(),
False,
metadata={'comment': 'This is a test description.'},
),
StructField(
'field_with_examples',
StringType(),
False,
metadata={'examples': ['test']},
),
StructField(
'field_with_description_and_examples',
StringType(),
False,
metadata={
'comment': 'testing description',
'examples': ['testing example'],
},
),
StructField(
'field_without_metadata',
StringType(),
False,
metadata={},
),
]
)

actual_schema = DescriptionModel.model_spark_schema()
assert actual_schema == expected_schema


def test_spark_schema_json_contains_field_metadata():
expected_json_schema = {
'type': 'struct',
'fields': [
{
'name': 'field_with_description',
'type': 'string',
'nullable': False,
'metadata': {'comment': 'This is a test description.'},
},
{
'name': 'field_with_examples',
'type': 'string',
'nullable': False,
'metadata': {'examples': ['test']},
},
{
'name': 'field_with_description_and_examples',
'type': 'string',
'nullable': False,
'metadata': {
'comment': 'testing description',
'examples': ['testing example'],
},
},
{
'name': 'field_without_metadata',
'type': 'string',
'nullable': False,
'metadata': {},
},
],
}

actual_json_schema = DescriptionModel.model_json_spark_schema()
assert actual_json_schema == expected_json_schema