File tree Expand file tree Collapse file tree 3 files changed +23
-5
lines changed
Expand file tree Collapse file tree 3 files changed +23
-5
lines changed Original file line number Diff line number Diff line change @@ -25,6 +25,7 @@ pyvenv.cfg
2525# *.DBF
2626* .pickle
2727* .parquet
28+ .virtual_documents
2829
2930# Byte-compiled / optimized / DLL files
3031__pycache__ /
Original file line number Diff line number Diff line change 11import os
2+ import struct
3+ import logging
24from datetime import datetime
35from pathlib import Path
46
@@ -109,9 +111,12 @@ def dbf_to_parquet(dbf: str, _pbar=None) -> str:
109111 chunk_df = pd .DataFrame (chunk )
110112 table = pa .Table .from_pandas (chunk_df .applymap (decode_column ))
111113 pq .write_to_dataset (table , root_path = str (parquet ))
112- except Exception as exc :
113- parquet .absolute ().unlink ()
114- raise exc
114+ except struct .error as err :
115+ if _pbar :
116+ _pbar .close ()
117+ Path (path ).unlink ()
118+ parquet .rmdir ()
119+ raise err
115120
116121 if _pbar :
117122 _pbar .update (approx_final_size - _pbar .n )
@@ -138,14 +143,16 @@ def str_to_int(string: str):
138143 # spaces as well
139144 if str (string ).replace (" " , "" ).isnumeric ():
140145 return int (string .replace (" " , "" ))
146+ return string
141147
142148 def str_to_date (string : str ):
143149 if isinstance (string , str ):
144150 try :
145151 return datetime .strptime (string , "%Y%m%d" ).date ()
146- except Exception :
152+ except ValueError :
147153 # Ignore errors, bad value
148- pass
154+ return string
155+ return string
149156
150157 map_column_func (["DT_NOTIFIC" , "DT_SIN_PRI" ], str_to_date )
151158 map_column_func (["CODMUNRES" , "SEXO" ], str_to_int )
Original file line number Diff line number Diff line change @@ -389,6 +389,16 @@ def line_file_parser(file_line):
389389 finally :
390390 ftp .close ()
391391
392+ upper_names = [n .upper () for n in content ]
393+ to_remove = []
394+ for name in content :
395+ if ".DBF" in name .upper ():
396+ if name .upper ().replace (".DBF" , ".DBC" ) in upper_names :
397+ to_remove .append (name )
398+
399+ for name in to_remove :
400+ del content [name ]
401+
392402 return content
393403
394404
You can’t perform that action at this time.
0 commit comments