Skip to content

Commit 3cbe7ca

Browse files
authored
Sac 28668 fix transform and schema (#177)
* handle empty arrays and fields that could be either formatted or nested * remove ipdb * bump version and add changelog entry * handle string parsing similar to existin tap-s3 logic * fix syntax error * fix bad tests
1 parent 1e0bccb commit 3cbe7ca

File tree

5 files changed

+33
-13
lines changed

5 files changed

+33
-13
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## 6.2.1
4+
* Fixes json schema generation to not treat numbers as dates
5+
* Fixes json schema generation to handle empty arrays
6+
* Fixes record transformation to handle fields that could be either formatted string or nested data structure
7+
* [#177](https://github.com/singer-io/singer-python/pull/177)
8+
39
## 6.2.0
410
* Adds json schema generation [#175](https://github.com/singer-io/singer-python/pull/175)
511

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import subprocess
55

66
setup(name="singer-python",
7-
version='6.2.0',
7+
version='6.2.1',
88
description="Singer.io utility library",
99
author="Stitch",
1010
classifiers=['Programming Language :: Python :: 3 :: Only'],

singer/schema_generation.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,33 @@ def add_observations(acc, path, data):
1818
for key in data:
1919
add_observations(acc, path + ["object", key], data[key])
2020
elif isinstance(data, list):
21+
if len(data) == 0:
22+
add_observations(acc, path + ["array"], None)
2123
for item in data:
2224
add_observations(acc, path + ["array"], item)
2325
elif isinstance(data, str):
24-
# If the string parses as a date, add an observation that its a date
2526
try:
26-
data = dateutil.parser.parse(data)
27-
except (dateutil.parser.ParserError, OverflowError):
28-
data = None
29-
if data:
27+
# If the string parses as a int, add an observation that it's a integer
28+
int(data)
29+
add_observation(acc, path + ["integer"])
30+
return acc
31+
except (ValueError, TypeError):
32+
pass
33+
try:
34+
# If the string parses as a float, add an observation that it's a number
35+
float(data)
36+
add_observation(acc, path + ["number"])
37+
return acc
38+
except (ValueError, TypeError):
39+
pass
40+
try:
41+
# If the string parses as a date, add an observation that it's a date
42+
dateutil.parser.parse(data)
3043
add_observation(acc, path + ["date"])
31-
else:
32-
add_observation(acc, path + ["string"])
33-
44+
return acc
45+
except (dateutil.parser.ParserError, OverflowError):
46+
pass
47+
add_observation(acc, path + ["string"])
3448
elif isinstance(data, bool):
3549
add_observation(acc, path + ["boolean"])
3650
elif isinstance(data, int):

singer/transform.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,13 +266,13 @@ def _transform(self, data, typ, schema, path):
266266
else:
267267
return False, None
268268

269-
elif schema.get("format") == "date-time":
269+
elif typ == "string" and schema.get("format") == "date-time":
270270
data = self._transform_datetime(data)
271271
if data is None:
272272
return False, None
273273

274274
return True, data
275-
elif schema.get("format") == "singer.decimal":
275+
elif typ == "string" and schema.get("format") == "singer.decimal":
276276
if data is None:
277277
return False, None
278278

tests/test_transform.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_nested_transform(self):
2525

2626
def test_multi_type_object_transform(self):
2727
schema = {"type": ["null", "object", "string"],
28-
"properties": {"whatever": {"type": "date-time",
28+
"properties": {"whatever": {"type": "string",
2929
"format": "date-time"}}}
3030
data = {"whatever": "2017-01-01"}
3131
expected = {"whatever": "2017-01-01T00:00:00.000000Z"}
@@ -36,7 +36,7 @@ def test_multi_type_object_transform(self):
3636

3737
def test_multi_type_array_transform(self):
3838
schema = {"type": ["null", "array", "integer"],
39-
"items": {"type": "date-time", "format": "date-time"}}
39+
"items": {"type": "string", "format": "date-time"}}
4040
data = ["2017-01-01"]
4141
expected = ["2017-01-01T00:00:00.000000Z"]
4242
self.assertEqual(expected, transform(data, schema))

0 commit comments

Comments
 (0)