Skip to content

Commit f8e66a2

Browse files
Add DSR Concepts to Fideslang [#94] (#95)
- Add < and > as allowed errors to FidesKey - Override FidesValidationError to be a subclass of ValueError not Exception so Pydantic can pick up and raise as a ValidationError. - Add fides_meta fields to Dataset, Collection, and DatasetField. These largely contain concepts for handling DSR's in fides. - Dataset.fidesctl_meta has been renamed to Dataset.fides_meta. - Add type validation. - Allow both fidesops_meta and fides_meta to be specified on Dataset/Collection/DatasetField for backwards-compatibility but have instantiation move fidesops_meta to fides_meta. Co-authored-by: Andrew Jackson <[email protected]>
1 parent dfb3201 commit f8e66a2

File tree

8 files changed

+576
-35
lines changed

8 files changed

+576
-35
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ The types of changes are:
1616

1717
## [Unreleased](https://github.com/ethyca/fideslang/compare/1.3.1...main)
1818

19+
### Changed
20+
21+
* Moved over DSR concepts into Fideslang. Expanded allowable characters for FideKey and added additional Dataset validation. [#95](https://github.com/ethyca/fideslang/pull/95)
22+
23+
1924
## [1.3.1](https://github.com/ethyca/fideslang/compare/1.3.0...1.3.1)
2025

2126
### Fixed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ mypy:
8888
pylint:
8989
@$(RUN) pylint src/
9090

91-
pytest:
91+
pytest: build-local
9292
@$(RUN) pytest -x
9393

9494
xenon:

src/fideslang/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
DataSubject,
1919
DataUse,
2020
Evaluation,
21+
FidesMeta,
2122
FidesModel,
23+
FidesDatasetReference,
24+
FidesCollectionKey,
2225
Organization,
2326
Policy,
2427
PolicyRule,

src/fideslang/models.py

Lines changed: 167 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,33 @@
1+
# pylint: disable=too-many-lines
2+
13
"""
24
Contains all of the Fides resources modeled as Pydantic models.
35
"""
46
from __future__ import annotations
57

68
from enum import Enum
7-
from typing import Dict, List, Optional
9+
from typing import Any, Dict, List, Literal, Optional, Union
810
from warnings import warn
911

10-
from pydantic import AnyUrl, BaseModel, Field, HttpUrl, root_validator, validator
12+
from pydantic import (
13+
AnyUrl,
14+
BaseModel,
15+
ConstrainedStr,
16+
Field,
17+
HttpUrl,
18+
PositiveInt,
19+
root_validator,
20+
validator,
21+
)
1122

1223
from fideslang.validation import (
1324
FidesKey,
1425
check_valid_country_code,
1526
matching_parent_key,
1627
no_self_reference,
28+
parse_data_type_string,
1729
sort_list_objects_by_name,
30+
valid_data_type,
1831
)
1932

2033
# Reusable components
@@ -285,23 +298,162 @@ class MyDatasetField(DatasetFieldBase):
285298
)
286299

287300

288-
class DatasetField(DatasetFieldBase):
301+
EdgeDirection = Literal["from", "to"]
302+
303+
304+
class FidesDatasetReference(BaseModel):
305+
"""Reference to a field from another Collection"""
306+
307+
dataset: FidesKey
308+
field: str
309+
direction: Optional[EdgeDirection]
310+
311+
312+
class FidesMeta(BaseModel):
313+
"""Supplementary metadata used by the Fides application for additional features."""
314+
315+
references: Optional[List[FidesDatasetReference]] = Field(
316+
description="Fields that current field references or is referenced by. Used for drawing the edges of a DSR graph.",
317+
default=None,
318+
)
319+
identity: Optional[str] = Field(
320+
description="The type of the identity data that should be used to query this collection for a DSR."
321+
)
322+
primary_key: Optional[bool] = Field(
323+
description="Whether the current field can be considered a primary key of the current collection"
324+
)
325+
data_type: Optional[str] = Field(
326+
description="Optionally specify the data type. Fides will attempt to cast values to this type when querying."
327+
)
328+
length: Optional[PositiveInt] = Field(
329+
description="Optionally specify the allowable field length. Fides will not generate values that exceed this size."
330+
)
331+
return_all_elements: Optional[bool] = Field(
332+
description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False."
333+
)
334+
read_only: Optional[bool] = Field(
335+
description="Optionally specify if a field is read-only, meaning it can't be updated or deleted."
336+
)
337+
338+
@validator("data_type")
339+
@classmethod
340+
def valid_data_type(cls, value: Optional[str]) -> Optional[str]:
341+
"""Validate that all annotated data types exist in the taxonomy"""
342+
return valid_data_type(value)
343+
344+
345+
class FidesopsMetaBackwardsCompat(BaseModel):
346+
"""Mixin to convert fidesops_meta to fides_meta for backwards compatibility
347+
as we add DSR concepts to fideslang"""
348+
349+
def __init__(self, **data: Union[Dataset, DatasetCollection, DatasetField]) -> None:
350+
"""For Datasets, DatasetCollections, and DatasetFields, if old fidesops_meta field is specified,
351+
convert this to a fides_meta field instead."""
352+
fidesops_meta = data.pop("fidesops_meta", None)
353+
fides_meta = data.pop("fides_meta", None)
354+
super().__init__(
355+
fides_meta=fides_meta or fidesops_meta,
356+
**data,
357+
)
358+
359+
360+
class DatasetField(DatasetFieldBase, FidesopsMetaBackwardsCompat):
289361
"""
290362
The DatasetField resource model.
291363
292364
This resource is nested within a DatasetCollection.
293365
"""
294366

367+
fides_meta: Optional[FidesMeta] = None
368+
295369
fields: Optional[List[DatasetField]] = Field(
296370
description="An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases).",
297371
)
298372

373+
@validator("fides_meta")
374+
@classmethod
375+
def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]:
376+
"""Validate upfront that the return_all_elements flag can only be specified on array fields"""
377+
if not meta_values:
378+
return meta_values
379+
380+
is_array: bool = bool(
381+
meta_values.data_type and meta_values.data_type.endswith("[]")
382+
)
383+
if not is_array and meta_values.return_all_elements is not None:
384+
raise ValueError(
385+
"The 'return_all_elements' attribute can only be specified on array fields."
386+
)
387+
return meta_values
388+
389+
@validator("fields")
390+
@classmethod
391+
def validate_object_fields( # type: ignore
392+
cls,
393+
fields: Optional[List["DatasetField"]],
394+
values: Dict[str, Any],
395+
) -> Optional[List["DatasetField"]]:
396+
"""Two validation checks for object fields:
397+
- If there are sub-fields specified, type should be either empty or 'object'
398+
- Additionally object fields cannot have data_categories.
399+
"""
400+
declared_data_type = None
401+
field_name: str = values.get("name") # type: ignore
402+
403+
if values.get("fides_meta"):
404+
declared_data_type = values["fides_meta"].data_type
405+
406+
if fields and declared_data_type:
407+
data_type, _ = parse_data_type_string(declared_data_type)
408+
if data_type != "object":
409+
raise ValueError(
410+
f"The data type '{data_type}' on field '{field_name}' is not compatible with specified sub-fields. Convert to an 'object' field."
411+
)
412+
413+
if (fields or declared_data_type == "object") and values.get("data_categories"):
414+
raise ValueError(
415+
f"Object field '{field_name}' cannot have specified data_categories. Specify category on sub-field instead"
416+
)
417+
418+
return fields
419+
420+
421+
# this is required for the recursive reference in the pydantic model:
422+
DatasetField.update_forward_refs()
423+
424+
425+
class FidesCollectionKey(ConstrainedStr):
426+
"""
427+
Dataset.Collection name where both dataset and collection names are valid FidesKeys
428+
"""
429+
430+
@classmethod
431+
def validate(cls, value: str) -> str:
432+
"""
433+
Overrides validation to check FidesCollectionKey format, and that both the dataset
434+
and collection names have the FidesKey format.
435+
"""
436+
values = value.split(".")
437+
if len(values) == 2:
438+
FidesKey.validate(values[0])
439+
FidesKey.validate(values[1])
440+
return value
441+
raise ValueError(
442+
"FidesCollection must be specified in the form 'FidesKey.FidesKey'"
443+
)
444+
299445

300-
class DatasetCollection(BaseModel):
446+
class CollectionMeta(BaseModel):
447+
"""Collection-level specific annotations used for query traversal"""
448+
449+
after: Optional[List[FidesCollectionKey]]
450+
451+
452+
class DatasetCollection(FidesopsMetaBackwardsCompat):
301453
"""
302454
The DatasetCollection resource model.
303455
304-
This resource is nested witin a Dataset.
456+
This resource is nested within a Dataset.
305457
"""
306458

307459
name: str = name_field
@@ -320,6 +472,8 @@ class DatasetCollection(BaseModel):
320472
description="An array of objects that describe the collection's fields.",
321473
)
322474

475+
fides_meta: Optional[CollectionMeta] = None
476+
323477
_sort_fields: classmethod = validator("fields", allow_reuse=True)(
324478
sort_list_objects_by_name
325479
)
@@ -362,10 +516,11 @@ class DatasetMetadata(BaseModel):
362516
"""
363517

364518
resource_id: Optional[str]
519+
after: Optional[List[FidesKey]]
365520

366521

367-
class Dataset(FidesModel):
368-
"The Dataset resource model."
522+
class Dataset(FidesModel, FidesopsMetaBackwardsCompat):
523+
"""The Dataset resource model."""
369524

370525
meta: Optional[Dict[str, str]] = Field(
371526
description="An optional object that provides additional information about the Dataset. You can structure the object however you like. It can be a simple set of `key: value` properties or a deeply nested hierarchy of objects. How you use the object is up to you: Fides ignores it."
@@ -377,8 +532,8 @@ class Dataset(FidesModel):
377532
default="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified",
378533
description="Array of Data Qualifier resources identified by `fides_key`, that apply to all collections in the Dataset.",
379534
)
380-
fidesctl_meta: Optional[DatasetMetadata] = Field(
381-
description=DatasetMetadata.__doc__,
535+
fides_meta: Optional[DatasetMetadata] = Field(
536+
description=DatasetMetadata.__doc__, default=None
382537
)
383538
joint_controller: Optional[ContactDetails] = Field(
384539
description=ContactDetails.__doc__,
@@ -393,6 +548,7 @@ class Dataset(FidesModel):
393548
collections: List[DatasetCollection] = Field(
394549
description="An array of objects that describe the Dataset's collections.",
395550
)
551+
396552
_sort_collections: classmethod = validator("collections", allow_reuse=True)(
397553
sort_list_objects_by_name
398554
)
@@ -848,7 +1004,8 @@ def deprecate_system_dependencies(cls, value: List[FidesKey]) -> List[FidesKey]:
8481004
return value
8491005

8501006
class Config:
851-
"Class for the System config"
1007+
"""Class for the System config"""
1008+
8521009
use_enum_values = True
8531010

8541011

src/fideslang/validation.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,16 @@
33
"""
44

55
import re
6-
from typing import List, Dict, Pattern
6+
from typing import Dict, List, Optional, Pattern, Set, Tuple
77

88
from pydantic import ConstrainedStr
99

1010
from fideslang.default_fixtures import COUNTRY_CODES
1111

12-
1312
VALID_COUNTRY_CODES = [country["alpha3Code"] for country in COUNTRY_CODES]
1413

1514

16-
class FidesValidationError(Exception):
15+
class FidesValidationError(ValueError):
1716
"""Custom exception for when the pydantic ValidationError can't be used."""
1817

1918

@@ -22,13 +21,15 @@ class FidesKey(ConstrainedStr):
2221
A FidesKey type that creates a custom constrained string.
2322
"""
2423

25-
regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.-]+$")
24+
regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.<>-]+$")
2625

2726
@classmethod # This overrides the default method to throw the custom FidesValidationError
2827
def validate(cls, value: str) -> str:
28+
"""Throws ValueError if val is not a valid FidesKey"""
29+
2930
if not cls.regex.match(value):
3031
raise FidesValidationError(
31-
f"FidesKeys must only contain alphanumeric characters, '.', '_' or '-'. Value provided: {value}"
32+
f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}"
3233
)
3334

3435
return value
@@ -73,7 +74,7 @@ def matching_parent_key(value: FidesKey, values: Dict) -> FidesKey:
7374
parent_key_from_fides_key = ".".join(split_fides_key[:-1])
7475
if parent_key_from_fides_key != value:
7576
raise FidesValidationError(
76-
"The parent_key ({0}) does not match the parent parsed ({1}) from the fides_key ({2})!".format(
77+
"The parent_key ({0}) does match the parent parsed ({1}) from the fides_key ({2})!".format(
7778
value, parent_key_from_fides_key, fides_key
7879
)
7980
)
@@ -93,3 +94,46 @@ def check_valid_country_code(country_code_list: List) -> List:
9394
)
9495
)
9596
return country_code_list
97+
98+
99+
def parse_data_type_string(type_string: Optional[str]) -> Tuple[Optional[str], bool]:
100+
"""Parse the data type string. Arrays are expressed in the form 'type[]'.
101+
102+
e.g.
103+
- 'string' -> ('string', false)
104+
- 'string[]' -> ('string', true)
105+
106+
These data_types are for use in DatasetField.fides_meta.
107+
"""
108+
if not type_string:
109+
return None, False
110+
idx = type_string.find("[]")
111+
if idx == -1:
112+
return type_string, False
113+
return type_string[:idx], True
114+
115+
116+
# Data types that Fides is currently configured to handle
117+
DATA_TYPE_NAMES: Set[str] = {
118+
"string",
119+
"integer",
120+
"float",
121+
"boolean",
122+
"object_id",
123+
"object",
124+
}
125+
126+
127+
def is_valid_data_type(type_name: str) -> bool:
128+
"""Is this type a valid data type identifier in fides?"""
129+
return type_name is None or type_name in DATA_TYPE_NAMES
130+
131+
132+
def valid_data_type(data_type_str: Optional[str]) -> Optional[str]:
133+
"""If the data_type is provided ensure that it is a member of DataType."""
134+
135+
parsed_date_type, _ = parse_data_type_string(data_type_str)
136+
if not is_valid_data_type(parsed_date_type): # type: ignore
137+
raise ValueError(f"The data type {data_type_str} is not supported.")
138+
139+
return data_type_str

tests/conftest.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ def resources_dict():
5353
name="Email",
5454
description="User's Email",
5555
path="another.another.path",
56-
data_categories=[
57-
"user.contact.email"
58-
],
56+
data_categories=["user.contact.email"],
5957
data_qualifier="aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified",
6058
),
6159
],

0 commit comments

Comments
 (0)