-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Add ZIP file decompression and TestCompression. #12175
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,11 +158,12 @@ class ParserWarning(Warning): | |
information | ||
<http://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_ on | ||
``iterator`` and ``chunksize``. | ||
compression : {'infer', 'gzip', 'bz2', None}, default 'infer' | ||
For on-the-fly decompression of on-disk data. If 'infer', then use gzip or | ||
bz2 if filepath_or_buffer is a string ending in '.gz' or '.bz2', | ||
respectively, and no decompression otherwise. Set to None for no | ||
decompression. | ||
compression : {'gzip', 'bz2', 'zip', 'infer', None}, default 'infer' | ||
For on-the-fly decompression of on-disk data. If 'infer', then use gzip, | ||
bz2 or zip if filepath_or_buffer is a string ending in '.gz', '.bz2' or | ||
'.zip', respectively, and no decompression otherwise. New in 0.18.1: ZIP | ||
compression If using 'zip', the ZIP file must contain only one data file | ||
to be read in. Set to None for no decompression. | ||
thousands : str, default None | ||
Thousands separator | ||
decimal : str, default '.' | ||
|
@@ -273,6 +274,8 @@ def _read(filepath_or_buffer, kwds): | |
inferred_compression = 'gzip' | ||
elif filepath_or_buffer.endswith('.bz2'): | ||
inferred_compression = 'bz2' | ||
elif filepath_or_buffer.endswith('.zip'): | ||
inferred_compression = 'zip' | ||
else: | ||
inferred_compression = None | ||
else: | ||
|
@@ -1397,6 +1400,25 @@ def _wrap_compressed(f, compression, encoding=None): | |
data = bz2.decompress(f.read()) | ||
f = StringIO(data) | ||
return f | ||
elif compression == 'zip': | ||
|
||
import zipfile | ||
zip_file = zipfile.ZipFile(f) | ||
zip_names = zip_file.namelist() | ||
print('ZIPNAMES' + zip_names) | ||
|
||
if len(zip_names) == 1: | ||
file_name = zip_names.pop() | ||
f = zip_file.open(file_name) | ||
return f | ||
|
||
elif len(zip_names) == 0: | ||
raise ValueError('Corrupted or zero files found in compressed ' | ||
'zip file %s', zip_file.filename) | ||
|
||
else: | ||
raise ValueError('Multiple files found in compressed ' | ||
'zip file %s', str(zip_names)) | ||
|
||
else: | ||
raise ValueError('do not recognize compression method %s' | ||
% compression) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need a versionadded tag here