-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
#!/usr/bin/env python3
import pandas as pd
import sys
# The problematic git hash that causes pandas to crash
PROBLEMATIC_HASH = "2396e87607016390692dc45951f2ba1878ffb39e"
# A working git hash for comparison
WORKING_HASH = "7354ea14bf673b25106c5edc37bb92118e4ebf63"
def create_test_csv(hash_value, filename):
"""Create a minimal CSV with just the problematic hash"""
content = f"""GitHash
{hash_value}"""
with open(filename, 'w') as f:
f.write(content)
def test_hash(hash_value, description):
"""Test reading a CSV with the given hash"""
filename = f"test_{description}.csv"
create_test_csv(hash_value, filename)
print(f"\n=== Testing {description} hash: {hash_value} ===")
try:
df = pd.read_csv(filename)
print(f"SUCCESS: Read {len(df)} rows")
return True
except Exception as e:
print(f"EXCEPTION: {e}")
return False
# Note: If this causes a segfault, we won't reach this return statement
def main():
print("Pandas CSV Crash Minimal Reproducer")
print("=" * 50)
print(f"pandas version: {pd.__version__}")
print(f"python version: {sys.version.split()[0]}")
print()
# Test the working hash first
working = test_hash(WORKING_HASH, "working")
# Test the problematic hash
# WARNING: This will likely crash the Python interpreter
print(f"\nWARNING: The next test will likely crash the interpreter!")
problematic = test_hash(PROBLEMATIC_HASH, "problematic")
print(f"\nIf you see this message, the crash was avoided and this was a bad repo")
if __name__ == '__main__':
main()
When I run this on the main branch I get:
Pandas CSV Crash Minimal Reproducer
==================================================
pandas version: 3.0.0.dev0+2466.g5cc3240965
python version: 3.12.3
=== Testing working hash: 7354ea14bf673b25106c5edc37bb92118e4ebf63 ===
SUCCESS: Read 1 rows
WARNING: The next test will likely crash the interpreter!
=== Testing problematic hash: 2396e87607016390692dc45951f2ba1878ffb39e ===
[1] 619586 segmentation fault (core dumped) python minimal_reproducer.py
Issue Description
2396e87607016390692dc45951f2ba1878ffb39e
looks like it causes the crash. I think in the code we get to floatify
somewhere via maybe_convert_numeric
and then bad things happen in the C 😄
Expected Behavior
It shouldn't crash and fall back to treating the value as a string.
Installed Versions
INSTALLED VERSIONS
commit : 5cc3240
python : 3.12.3
python-bits : 64
OS : Linux
OS-release : 6.6.87.2-microsoft-standard-WSL2
Version : #1 SMP PREEMPT_DYNAMIC Thu Jun 5 18:30:46 UTC 2025
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : C.UTF-8
pandas : 3.0.0.dev0+2466.g5cc3240965
numpy : 2.4.0.dev0+git20251001.8fecead
dateutil : 2.9.0.post0
pip : 25.2
Cython : None
sphinx : None
IPython : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : None
bottleneck : None
fastparquet : None
fsspec : None
html5lib : None
hypothesis : None
gcsfs : None
jinja2 : None
lxml.etree : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
psycopg2 : None
pymysql : None
pyarrow : None
pyiceberg : None
pyreadstat : None
pytest : None
python-calamine : None
pytz : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlsxwriter : None
zstandard : None
qtpy : None
pyqt5 : None