Skip to content

Commit b4e9c7e

Browse files
rollakariv
authored andcommitted
Force strings from stream for the infer strings strategy (#115)
* Force strings from stream for the infer strings strategy * Updated the load strategies test
1 parent 6fb3e68 commit b4e9c7e

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

dataflows/processors/load.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ def __init__(self, load_source, name=None, resources=None, strip=True, limit_row
140140
if options['validate']:
141141
cast_strategy = self.CAST_WITH_SCHEMA
142142

143+
# Force strings from stream for the INFER_STRINGS strategy
144+
if infer_strategy == self.INFER_STRINGS:
145+
self.options['force_strings'] = True
146+
143147
self.guesser = {
144148
self.INFER_FULL: None,
145149
self.INFER_PYTHON_TYPES: TypesGuesser,

tests/test_lib.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -617,12 +617,30 @@ def test_load_strategies():
617617
'pytypes nothing': out_t + out_s,
618618
'pytypes schema': out_t + out_s,
619619
'pytypes strings': out_s + out_s,
620-
'strings nothing': out_t + out_s,
621-
'strings schema': [] + out_s,
620+
'strings nothing': out_s + out_s,
621+
'strings schema': out_s + out_s,
622622
'strings strings': out_s + out_s
623623
}
624624

625625

626+
def test_load_strategy_infer_strings_from_native_types():
627+
from dataflows import load
628+
629+
flow = Flow(
630+
load(
631+
'data/beatles_age.json',
632+
infer_strategy='strings',
633+
),
634+
)
635+
data, package, stats = flow.results()
636+
assert data == [[
637+
{'age': '18', 'name': 'john'},
638+
{'age': '16', 'name': 'paul'},
639+
{'age': '17', 'name': 'george'},
640+
{'age': '22', 'name': 'ringo'},
641+
]]
642+
643+
626644
def test_load_name_path():
627645
from dataflows import load
628646

0 commit comments

Comments
 (0)