Skip to content

Commit bb412f4

Browse files
skuttlemanbryantgrayhumbamp123
authored
SAC-28668: update schema generation v6 (#180)
* use `anyOf` when multiple types are found * fix test * Update schema generation and bump version for v6 deploy Co-authored-by: Bryant Gray <[email protected]> Co-authored-by: Andres Pineda <[email protected]> * Grab error list changes from v5 Co-authored-by: Bryant Gray <[email protected]> Co-authored-by: Andres Pineda <[email protected]> * Fix linting error Co-authored-by: Bryant Gray <[email protected]> --------- Co-authored-by: Bryant Gray <[email protected]> Co-authored-by: Andres Pineda <[email protected]>
1 parent 3cbe7ca commit bb412f4

File tree

5 files changed

+37
-34
lines changed

5 files changed

+37
-34
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## 6.2.2
4+
* Updates json schema generation to not emit dates
5+
* Handle multiple schemas with anyOf and emit them in a specific order
6+
* Do not emit error messages when checking multiple schemas and a subsequent schema passes
7+
* [#179](https://github.com/singer-io/singer-python/pull/179)
8+
39
## 6.2.1
410
* Fixes json schema generation to not treat numbers as dates
511
* Fixes json schema generation to handle empty arrays

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import subprocess
55

66
setup(name="singer-python",
7-
version='6.2.1',
7+
version='6.2.2',
88
description="Singer.io utility library",
99
author="Stitch",
1010
classifiers=['Programming Language :: Python :: 3 :: Only'],

singer/schema_generation.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import dateutil.parser
2-
3-
41
def add_observation(acc, path):
52

63
node = acc
@@ -37,13 +34,6 @@ def add_observations(acc, path, data):
3734
return acc
3835
except (ValueError, TypeError):
3936
pass
40-
try:
41-
# If the string parses as a date, add an observation that it's a date
42-
dateutil.parser.parse(data)
43-
add_observation(acc, path + ["date"])
44-
return acc
45-
except (dateutil.parser.ParserError, OverflowError):
46-
pass
4737
add_observation(acc, path + ["string"])
4838
elif isinstance(data, bool):
4939
add_observation(acc, path + ["boolean"])
@@ -59,9 +49,13 @@ def add_observations(acc, path, data):
5949
return acc
6050

6151
def to_json_schema(obs):
62-
result = {'type': ['null']}
52+
types = []
53+
# add schema types in a specific order to anyOf list
54+
for key in ['array', 'object', 'number', 'integer', 'boolean', 'string', 'null']:
55+
if key not in obs:
56+
continue
6357

64-
for key in obs:
58+
result = {'type': ['null']}
6559

6660
if key == 'object':
6761
result['type'] += ['object']
@@ -74,9 +68,6 @@ def to_json_schema(obs):
7468
result['type'] += ['array']
7569
result['items'] = to_json_schema(obs['array'])
7670

77-
elif key == 'date':
78-
result['type'] += ['string']
79-
result['format'] = 'date-time'
8071
elif key == 'string':
8172
result['type'] += ['string']
8273

@@ -97,7 +88,15 @@ def to_json_schema(obs):
9788
else:
9889
raise Exception("Unexpected data type " + key)
9990

100-
return result
91+
types.append(result)
92+
93+
if len(types) == 0:
94+
return {'type': ['null', 'string']}
95+
96+
if len(types) == 1:
97+
return types[0]
98+
99+
return {'anyOf': types}
101100

102101
def generate_schema(records):
103102
obs = {}

singer/transform.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ def _transform_anyof(self, data, schema, path):
185185
success, transformed_data = self.transform_recur(data, subschema, path)
186186
if success:
187187
return success, transformed_data
188+
else:
189+
self.errors.pop()
188190
else: # pylint: disable=useless-else-on-loop
189191
# exhaused all schemas and didn't return, so we failed :-(
190192
self.errors.append(Error(path, data, schema, logging_level=LOGGER.level))

tests/test_schema_generation.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_simple_schema(self):
1010
'a': {'type': ['null', 'integer']},
1111
'b': {'type': ['null', 'string']},
1212
'c': {'type': ['null', 'boolean']},
13-
'dt': {'type': ['null', 'string'], 'format': 'date-time'}
13+
'dt': {'type': ['null', 'string']}
1414
}
1515
}
1616
self.assertEqual(expected_schema, generate_schema(records))
@@ -23,24 +23,20 @@ def test_mix_n_match_records_schema(self):
2323
]
2424
expected_schema = {
2525
'type': ['null', 'object'],
26-
'properties': {
27-
'a': {'type': {'null', 'integer', 'string', 'boolean'}},
28-
'b': {'type': ['null', 'string']},
29-
'c': {'type': {'null', 'integer', 'string'}, 'format': 'singer.decimal'},
30-
'd': {
31-
'type': {'null', 'array', 'object'},
32-
'items': {'type': {'null', 'integer', 'string'}},
33-
'properties': {'one': {'type': ['null', 'integer']},
34-
'two': {'type': ['null', 'string']}}
35-
36-
}
37-
}
26+
'properties': {'a': {'anyOf': [{'type': ['null', 'integer']},
27+
{'type': ['null', 'boolean']},
28+
{'type': ['null', 'string']}]},
29+
'b': {'type': ['null', 'string']},
30+
'c': {'anyOf': [{'type': ['null', 'string'], 'format': 'singer.decimal'},
31+
{'type': ['null', 'integer']}]},
32+
'd': {'anyOf': [{'type': ['null', 'array'],
33+
'items': {'anyOf': [{'type': ['null', 'integer']},
34+
{'type': ['null', 'string']}]}},
35+
{'type': ['null', 'object'],
36+
'properties': {'one': {'type': ['null', 'integer']},
37+
'two': {'type': ['null', 'string']}}}]}}
3838
}
3939
actual_schema = generate_schema(records)
40-
actual_schema['properties']['a']['type'] = set(actual_schema['properties']['a']['type'])
41-
actual_schema['properties']['c']['type'] = set(actual_schema['properties']['c']['type'])
42-
actual_schema['properties']['d']['type'] = set(actual_schema['properties']['d']['type'])
43-
actual_schema['properties']['d']['items']['type'] = set(actual_schema['properties']['d']['items']['type'])
4440
self.assertEqual(expected_schema, actual_schema)
4541

4642
def test_nested_structue_schema(self):

0 commit comments

Comments
 (0)