Skip to content

Commit 9377252

Browse files
author
Paul Santa Clara
authored
Merge pull request #104 from singer-io/feature/parsing-improvements
parsing improvements
2 parents 6118a61 + c3a05e8 commit 9377252

File tree

3 files changed

+71
-5
lines changed

3 files changed

+71
-5
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
'simplejson==3.11.1',
1616
'python-dateutil>=2.6.0',
1717
'backoff==1.8.0',
18+
'ciso8601',
1819
],
1920
extras_require={
2021
'dev': [

singer/messages.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import sys
22

3-
import dateutil.parser
43
import pytz
54
import simplejson as json
5+
import ciso8601
66

77
import singer.utils as u
8+
from .logger import get_logger
9+
LOGGER = get_logger()
810

911
class Message():
1012
'''Base class for messages.'''
@@ -180,13 +182,20 @@ def parse_message(msg):
180182
# lossy conversions. However, this will affect
181183
# very few data points and we have chosen to
182184
# leave conversion as is for now.
183-
obj = json.loads(msg)
185+
obj = json.loads(msg, use_decimal=True)
184186
msg_type = _required_key(obj, 'type')
185187

186188
if msg_type == 'RECORD':
187189
time_extracted = obj.get('time_extracted')
188190
if time_extracted:
189-
time_extracted = dateutil.parser.parse(time_extracted)
191+
try:
192+
time_extracted = ciso8601.parse_datetime(time_extracted)
193+
except:
194+
LOGGER.warning("unable to parse time_extracted with ciso8601 library")
195+
time_extracted = None
196+
197+
198+
# time_extracted = dateutil.parser.parse(time_extracted)
190199
return RecordMessage(stream=_required_key(obj, 'stream'),
191200
record=_required_key(obj, 'record'),
192201
version=obj.get('version'),

tests/test_singer.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import unittest
33
import datetime
44
import dateutil
5+
from decimal import Decimal
6+
57

68
class TestSinger(unittest.TestCase):
79
def test_parse_message_record_good(self):
@@ -89,7 +91,6 @@ def test_parse_message_state_missing_value(self):
8991
singer.parse_message('{"type": "STATE"}')
9092

9193
def test_round_trip(self):
92-
9394
record_message = singer.RecordMessage(
9495
record={'name': 'foo'},
9596
stream='users')
@@ -102,7 +103,7 @@ def test_round_trip(self):
102103
'name': {'type': 'string'}}})
103104

104105
state_message = singer.StateMessage(value={'seq': 1})
105-
106+
106107
self.assertEqual(record_message,
107108
singer.parse_message(singer.format_message(record_message)))
108109
self.assertEqual(schema_message,
@@ -124,5 +125,60 @@ def test_write_schema(self):
124125
def test_write_state(self):
125126
singer.write_state({"foo": 1})
126127

128+
class TestParsingNumbers(unittest.TestCase):
129+
def create_record(self, value):
130+
raw = '{"type": "RECORD", "stream": "test", "record": {"value": ' + value + '}}'
131+
parsed = singer.parse_message(raw)
132+
return parsed.record['value']
133+
134+
def test_parse_int_zero(self):
135+
value = self.create_record('0')
136+
self.assertEqual(type(value), int)
137+
self.assertEqual(value, 0)
138+
139+
def test_parse_regular_decimal(self):
140+
value = self.create_record('3.14')
141+
self.assertEqual(Decimal('3.14'), value)
142+
143+
def test_parse_large_decimal(self):
144+
value = self.create_record('9999999999999999.9999')
145+
self.assertEqual(Decimal('9999999999999999.9999'), value)
146+
147+
def test_parse_small_decimal(self):
148+
value = self.create_record('-9999999999999999.9999')
149+
self.assertEqual(Decimal('-9999999999999999.9999'), value)
150+
151+
def test_parse_absurdly_large_decimal(self):
152+
value_str = '9' * 1024 + '.' + '9' * 1024
153+
value = self.create_record(value_str)
154+
self.assertEqual(Decimal(value_str), value)
155+
156+
def test_parse_absurdly_large_int(self):
157+
value_str = '9' * 1024
158+
value = self.create_record(value_str)
159+
self.assertEqual(int(value_str), value)
160+
self.assertEqual(int, type(value))
161+
162+
def test_parse_bulk_decs(self):
163+
value_strs = [
164+
'-9999999999999999.9999999999999999999999',
165+
'0',
166+
'9999999999999999.9999999999999999999999',
167+
'-7187498962233394.3739812942138415666763',
168+
'9273972760690975.2044306442955715221042',
169+
'29515565286974.1188802122612813004366',
170+
'9176089101347578.2596296292040288441238',
171+
'-8416853039392703.306423225471199148379',
172+
'1285266411314091.3002668125515694162268',
173+
'6051872750342125.3812886238958681227336',
174+
'-1132031605459408.5571559429308939781468',
175+
'-6387836755056303.0038029604189860431045',
176+
'4526059300505414'
177+
]
178+
for value_str in value_strs:
179+
value = self.create_record(value_str)
180+
self.assertEqual(Decimal(value_str), value)
181+
182+
127183
if __name__ == '__main__':
128184
unittest.main()

0 commit comments

Comments
 (0)