Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/requirements-3.5.pip
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
xarray==0.9.1
pandas-gbq
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needds to be reverted

moto
1 change: 1 addition & 0 deletions ci/requirements_all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ sqlalchemy
bottleneck
pymysql
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is ok

Jinja2
s3fs
2 changes: 1 addition & 1 deletion ci/requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ cython
pytest>=3.1.0
pytest-cov
flake8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert this, s3fs is NOT a requirement for dev; we should be robust to not having this installed

moto
s3fs
8 changes: 8 additions & 0 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,12 +341,20 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
json = filepath_or_buffer
elif hasattr(filepath_or_buffer, 'read'):
json = filepath_or_buffer.read()

else:
json = filepath_or_buffer

if lines:
# If given a json lines file, we break the string into lines, add
# commas and put it in a json list to make a valid json object.

"""
If PY3 and/or isinstance(json, bytes)
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a 1-line comment is fine;

if PY3 and isinstance(json, bytes):
    ...

if isinstance(json, bytes):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1 line comment

json = json.decode('utf-8')

lines = list(StringIO(json.strip()))
json = '[' + ','.join(lines) + ']'

Expand Down
74 changes: 74 additions & 0 deletions pandas/tests/io/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os

import moto
import pytest
from pandas.io.parsers import read_table

HERE = os.path.dirname(__file__)


@pytest.fixture(scope='module')
def tips_file():
"""Path to the tips dataset"""
return os.path.join(HERE, 'parser', 'data', 'tips.csv')


@pytest.fixture(scope='module')
def jsonl_file():
"""Path a JSONL dataset"""
return os.path.join(HERE, 'parser', 'data', 'items.jsonl')


@pytest.fixture(scope='module')
def salaries_table():
"""DataFrame with the salaries dataset"""
path = os.path.join(HERE, 'parser', 'data', 'salaries.csv')
return read_table(path)


@pytest.fixture(scope='module')
def s3_resource(tips_file, jsonl_file):
"""Fixture for mocking S3 interaction.

The primary bucket name is "pandas-test". The following datasets
are loaded.

- tips.csv
- tips.csv.gz
- tips.csv.bz2
- items.jsonl

A private bucket "cant_get_it" is also created. The boto3 s3 resource
is yielded by the fixture.
"""
pytest.importorskip('s3fs')
moto.mock_s3().start()

test_s3_files = [
('tips.csv', tips_file),
('tips.csv.gz', tips_file + '.gz'),
('tips.csv.bz2', tips_file + '.bz2'),
('items.jsonl', jsonl_file),
]

def add_tips_files(bucket_name):
for s3_key, file_name in test_s3_files:
with open(file_name, 'rb') as f:
conn.Bucket(bucket_name).put_object(
Key=s3_key,
Body=f)

boto3 = pytest.importorskip('boto3')
# see gh-16135
bucket = 'pandas-test'

conn = boto3.resource("s3", region_name="us-east-1")
conn.create_bucket(Bucket=bucket)
add_tips_files(bucket)

conn.create_bucket(Bucket='cant_get_it', ACL='private')
add_tips_files('cant_get_it')

yield conn

moto.mock_s3().stop()
20 changes: 18 additions & 2 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pandas.compat import (range, lrange, StringIO,
OrderedDict, is_platform_32bit)
import os

import numpy as np
from pandas import (Series, DataFrame, DatetimeIndex, Timestamp,
read_json, compat)
Expand Down Expand Up @@ -985,12 +984,29 @@ def test_tz_range_is_utc(self):
df = DataFrame({'DT': dti})
assert dumps(df, iso_dates=True) == dfexp

def test_read_jsonl(self):
def test_read_inline_jsonl(self):
# GH9180
result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
assert_frame_equal(result, expected)

def test_read_s3_jsonl(self, s3_resource):
pytest.importorskip('s3fs')
# GH17200
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a pytest.importorskip() here I think.


result = read_json('s3n://pandas-test/items.jsonl', lines=True)
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
assert_frame_equal(result, expected)

def test_read_local_jsonl(self):
# GH17200
with ensure_clean('tmp_items.json') as path:
with open(path, 'w') as infile:
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
result = read_json(path, lines=True)
expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
assert_frame_equal(result, expected)

def test_read_jsonl_unicode_chars(self):
# GH15132: non-ascii unicode characters
# \u201d == RIGHT DOUBLE QUOTATION MARK
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/io/parser/data/items.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"a": 1, "b": 2}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the purpose of this file?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, ok you have to have this named .json otherwise it won't be picked up by setup.py (IOW the install test will fail).

{"b":2, "a" :1}
48 changes: 0 additions & 48 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,14 @@
Tests parsers ability to read and parse non-local files
and hence require a network connection to be read.
"""
import os

import pytest
import moto

import pandas.util.testing as tm
from pandas import DataFrame
from pandas.io.parsers import read_csv, read_table
from pandas.compat import BytesIO


@pytest.fixture(scope='module')
def tips_file():
return os.path.join(tm.get_data_path(), 'tips.csv')

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool


@pytest.fixture(scope='module')
def salaries_table():
path = os.path.join(tm.get_data_path(), 'salaries.csv')
return read_table(path)


@pytest.fixture(scope='module')
def s3_resource(tips_file):
pytest.importorskip('s3fs')
moto.mock_s3().start()

test_s3_files = [
('tips.csv', tips_file),
('tips.csv.gz', tips_file + '.gz'),
('tips.csv.bz2', tips_file + '.bz2'),
]

def add_tips_files(bucket_name):
for s3_key, file_name in test_s3_files:
with open(file_name, 'rb') as f:
conn.Bucket(bucket_name).put_object(
Key=s3_key,
Body=f)

boto3 = pytest.importorskip('boto3')
# see gh-16135
bucket = 'pandas-test'

conn = boto3.resource("s3", region_name="us-east-1")
conn.create_bucket(Bucket=bucket)
add_tips_files(bucket)

conn.create_bucket(Bucket='cant_get_it', ACL='private')
add_tips_files('cant_get_it')

yield conn

moto.mock_s3().stop()


@pytest.mark.network
@pytest.mark.parametrize(
"compression,extension",
Expand Down