Skip to content

Commit 59bb420

Browse files
committed
add test for file reading
1 parent f13acb1 commit 59bb420

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

tests/integ/test_datatypes.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,3 +602,63 @@ def test_end_to_end_default_precision(session, precision, mock_default_precision
602602
result.schema.fields[2].datatype._precision
603603
== mock_default_precision[LongType]
604604
)
605+
606+
607+
@pytest.mark.skipif(
608+
"config.getoption('local_testing_mode', default=False)",
609+
reason="relaxed_types not supported by local testing mode",
610+
)
611+
@pytest.mark.parametrize("massive_number", ["9" * 38, "5" * 19, "7" * 5])
612+
def test_default_precision_read_file(session, massive_number):
613+
mock_default_precision = {"LongType": 19, "IntegerType": 10}
614+
with mock.patch.object(
615+
context, "_is_snowpark_connect_compatible_mode", True
616+
), mock.patch.object(
617+
context, "_integral_type_default_precision", mock_default_precision
618+
):
619+
stage_name = Utils.random_stage_name()
620+
header = ("BIG_NUM",)
621+
test_data = [(massive_number,)]
622+
623+
def write_csv(data):
624+
with tempfile.NamedTemporaryFile(
625+
mode="w+",
626+
delete=False,
627+
suffix=".csv",
628+
newline="",
629+
) as file:
630+
writer = csv.writer(file)
631+
writer.writerow(header)
632+
for row in data:
633+
writer.writerow(row)
634+
return file.name
635+
636+
file_path = write_csv(test_data)
637+
638+
try:
639+
Utils.create_stage(session, stage_name, is_temporary=True)
640+
result = session.file.put(
641+
file_path, f"@{stage_name}", auto_compress=False, overwrite=True
642+
)
643+
644+
# Infer schema from only the short file
645+
constrained_reader = session.read.options(
646+
{
647+
"INFER_SCHEMA": True,
648+
"INFER_SCHEMA_OPTIONS": {"FILES": [result[0].target]},
649+
"PARSE_HEADER": True,
650+
# Only load the short file
651+
"PATTERN": f".*{result[0].target}",
652+
}
653+
)
654+
655+
# df1 uses constrained types
656+
df1 = constrained_reader.csv(f"@{stage_name}/")
657+
datatype = df1.schema.fields[0].datatype
658+
assert isinstance(datatype, LongType)
659+
assert datatype._precision == len(massive_number)
660+
661+
finally:
662+
Utils.drop_stage(session, stage_name)
663+
if os.path.exists(file_path):
664+
os.remove(file_path)

0 commit comments

Comments
 (0)