Skip to content

Conversation

@khemkaran10
Copy link
Contributor

Before Fix:

>>> pd.read_csv(
...     io.StringIO("1,2,3"),
...     names=["col1", "col2", "col3"],
...     engine="pyarrow",
... )
---------------------------------------------------------------------------
ArrowInvalid     
...
ParserError: CSV parse error: Empty CSV file or block: cannot infer number of columns

After Fix:

>>> pd.read_csv(
...     io.StringIO("1,2,3"),
...     names=["col1", "col2", "col3"],
...     engine="pyarrow",
... )
---------------------------------------------------------------------------
    col1  col2  col3
0     1     2     3

@Alvaro-Kothe
Copy link
Member

There are some tests that is skipping pyarrow because of this. Can you try to remove the skip?

@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block
def test_float_parser(all_parsers):
# see gh-9565
parser = all_parsers
data = "45e-1,4.5,45.,inf,-inf"
result = parser.read_csv(StringIO(data), header=None)
expected = DataFrame([[float(s) for s in data.split(",")]])
tm.assert_frame_equal(result, expected)

@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
)
def test_int64_uint64_range(all_parsers, val):
# These numbers fall right inside the int64-uint64
# range, so they should be parsed as integer.
parser = all_parsers
result = parser.read_csv(StringIO(str(val)), header=None)
expected = DataFrame([val])
tm.assert_frame_equal(result, expected)
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
def test_outside_int64_uint64_range(all_parsers, val, request):
# These numbers fall just outside the int64-uint64
# range, so they should be parsed as object.
parser = all_parsers
if parser.engine == "python":
mark = pytest.mark.xfail(reason="TODO: Python engine reads bigint as string")
request.applymarker(mark)
result = parser.read_csv(StringIO(str(val)), header=None)
expected = DataFrame([val])
tm.assert_frame_equal(result, expected)
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
def test_outside_int64_uint64_range_follow_str(all_parsers, val):
parser = all_parsers
result = parser.read_csv(StringIO(f"{val}\nabc"), header=None)
expected = DataFrame([str(val), "abc"])
tm.assert_frame_equal(result, expected)

@khemkaran10 khemkaran10 marked this pull request as draft October 10, 2025 11:56
@github-actions
Copy link
Contributor

This pull request is stale because it has been open for thirty days with no activity. Please update and respond to this comment if you're still interested in working on this.

@github-actions github-actions bot added the Stale label Nov 11, 2025
@mroeschke
Copy link
Member

Thanks for the pull request, but it appears to have gone stale. If interested in continuing, please merge in the main branch, address any review comments and/or failing tests, and we can reopen.

@mroeschke mroeschke closed this Dec 1, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

BUG: read_csv with pyarrow engine cannot handle single-line CSV files

3 participants