Skip to content

Commit 82da723

Browse files
authored
fix(FTP): remove DBF from content if DBC is present (#168)
1 parent d8750ba commit 82da723

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pyvenv.cfg
2525
# *.DBF
2626
*.pickle
2727
*.parquet
28+
.virtual_documents
2829

2930
# Byte-compiled / optimized / DLL files
3031
__pycache__/

pysus/data/__init__.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import os
2+
import struct
3+
import logging
24
from datetime import datetime
35
from pathlib import Path
46

@@ -109,9 +111,12 @@ def dbf_to_parquet(dbf: str, _pbar=None) -> str:
109111
chunk_df = pd.DataFrame(chunk)
110112
table = pa.Table.from_pandas(chunk_df.applymap(decode_column))
111113
pq.write_to_dataset(table, root_path=str(parquet))
112-
except Exception as exc:
113-
parquet.absolute().unlink()
114-
raise exc
114+
except struct.error as err:
115+
if _pbar:
116+
_pbar.close()
117+
Path(path).unlink()
118+
parquet.rmdir()
119+
raise err
115120

116121
if _pbar:
117122
_pbar.update(approx_final_size - _pbar.n)
@@ -138,14 +143,16 @@ def str_to_int(string: str):
138143
# spaces as well
139144
if str(string).replace(" ", "").isnumeric():
140145
return int(string.replace(" ", ""))
146+
return string
141147

142148
def str_to_date(string: str):
143149
if isinstance(string, str):
144150
try:
145151
return datetime.strptime(string, "%Y%m%d").date()
146-
except Exception:
152+
except ValueError:
147153
# Ignore errors, bad value
148-
pass
154+
return string
155+
return string
149156

150157
map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date)
151158
map_column_func(["CODMUNRES", "SEXO"], str_to_int)

pysus/ftp/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,16 @@ def line_file_parser(file_line):
389389
finally:
390390
ftp.close()
391391

392+
upper_names = [n.upper() for n in content]
393+
to_remove = []
394+
for name in content:
395+
if ".DBF" in name.upper():
396+
if name.upper().replace(".DBF", ".DBC") in upper_names:
397+
to_remove.append(name)
398+
399+
for name in to_remove:
400+
del content[name]
401+
392402
return content
393403

394404

0 commit comments

Comments
 (0)