pandas-dev · sanggon6107 · Jul 25, 2025 · Jul 27, 2025 · Jul 27, 2025
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -1201,7 +1201,7 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:
 
                 if actual_len > col_len:
                     if callable(self.on_bad_lines):
-                        new_l = self.on_bad_lines(_content)
+                        new_l = self.on_bad_lines(col_len, actual_len, i + 2, _content)
                         if new_l is not None:
                             content.append(new_l)  # pyright: ignore[reportArgumentType]
                     elif self.on_bad_lines in (

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -414,7 +414,10 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
     - ``'skip'``, skip bad lines without raising or warning when they are encountered.
     - Callable, function that will process a single bad line.
         - With ``engine='python'``, function with signature
-          ``(bad_line: list[str]) -> list[str] | None``.
+          ``(expected_columns: int, actual_columns: int, row: int, bad_line: list[str]) -> list[str] | None``.
+          ``expected_columns`` is the expected number of columns.
+          ``actual_columns`` is the actual number of columns.
+          ``row`` is the row number of the bad line.
           ``bad_line`` is a list of strings split by the ``sep``.
           If the function returns ``None``, the bad line will be ignored.
           If the function returns a new ``list`` of strings with more elements than

diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
@@ -341,7 +341,9 @@ def readline(self):
     parser.read_csv(NoNextBuffer("a\n1"))
 
 
-@pytest.mark.parametrize("bad_line_func", [lambda x: ["2", "3"], lambda x: x[:2]])
+@pytest.mark.parametrize(
+    "bad_line_func", [lambda x, y, z, a: ["2", "3"], lambda x, y, z, a: a[:2]]
+)
 def test_on_bad_lines_callable(python_parser_only, bad_line_func):
     # GH 5686
     parser = python_parser_only
@@ -367,7 +369,9 @@ def test_on_bad_lines_callable_write_to_external_list(python_parser_only):
     bad_sio = StringIO(data)
     lst = []
 
-    def bad_line_func(bad_line: list[str]) -> list[str]:
+    def bad_line_func(
+        expected_columns: int, actual_columns: int, row: int, bad_line: list[str]
+    ) -> list[str]:
         lst.append(bad_line)
         return ["2", "3"]
 
@@ -377,7 +381,9 @@ def bad_line_func(bad_line: list[str]) -> list[str]:
     assert lst == [["2", "3", "4", "5", "6"]]
 
 
-@pytest.mark.parametrize("bad_line_func", [lambda x: ["foo", "bar"], lambda x: x[:2]])
+@pytest.mark.parametrize(
+    "bad_line_func", [lambda x, y, z, a: ["foo", "bar"], lambda x, y, z, a: a[:2]]
+)
 @pytest.mark.parametrize("sep", [",", "111"])
 def test_on_bad_lines_callable_iterator_true(python_parser_only, bad_line_func, sep):
     # GH 5686
@@ -414,7 +420,7 @@ def test_on_bad_lines_callable_dont_swallow_errors(python_parser_only):
     bad_sio = StringIO(data)
     msg = "This function is buggy."
 
-    def bad_line_func(bad_line):
+    def bad_line_func(expected_columns, actual_columns, row, bad_line):
         raise ValueError(msg)
 
     with pytest.raises(ValueError, match=msg):
@@ -432,7 +438,10 @@ def test_on_bad_lines_callable_not_expected_length(python_parser_only):
     bad_sio = StringIO(data)
 
     result = parser.read_csv_check_warnings(
-        ParserWarning, "Length of header or names", bad_sio, on_bad_lines=lambda x: x
+        ParserWarning,
+        "Length of header or names",
+        bad_sio,
+        on_bad_lines=lambda x, y, z, a: a,
     )
     expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
     tm.assert_frame_equal(result, expected)
@@ -448,7 +457,7 @@ def test_on_bad_lines_callable_returns_none(python_parser_only):
 """
     bad_sio = StringIO(data)
 
-    result = parser.read_csv(bad_sio, on_bad_lines=lambda x: None)
+    result = parser.read_csv(bad_sio, on_bad_lines=lambda x, y, z, a: None)
     expected = DataFrame({"a": [1, 3], "b": [2, 4]})
     tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
@@ -154,7 +154,7 @@ def test_on_bad_lines_callable_python_or_pyarrow(self, all_parsers):
         # GH 5686
         # GH 54643
         sio = StringIO("a,b\n1,2")
-        bad_lines_func = lambda x: x
+        bad_lines_func = lambda x, y, z, a: a
         parser = all_parsers
         if all_parsers.engine not in ["python", "pyarrow"]:
             msg = (