Skip to content

Commit 29176c2

Browse files
authored
Merge pull request #38 from pylint-dev/17-add-unnecessary-iteration-avoidance-pandas-checker
Add pandas iterrows checker
2 parents 3e9ae80 + 3360980 commit 29176c2

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for usage of the inefficient pandas DataFrame.iterrows() method."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasIterrowsChecker(BaseChecker):
16+
name = "pandas-iterrows"
17+
msgs = {
18+
"W8106": (
19+
"Usage of pandas DataFrame.iterrows() detected",
20+
"pandas-iterrows",
21+
"Avoid using DataFrame.iterrows() for large datasets. Consider using vectorized operations or "
22+
".itertuples() instead.",
23+
),
24+
}
25+
26+
@only_required_for_messages("pandas-iterrows")
27+
def visit_call(self, node: nodes.Call) -> None:
28+
if isinstance(node.func, nodes.Attribute):
29+
method_name = getattr(node.func, "attrname", None)
30+
if method_name == "iterrows":
31+
object_name = getattr(node.func.expr, "name", None)
32+
if object_name and self._is_dataframe_name(object_name):
33+
self.add_message("pandas-iterrows", node=node, confidence=HIGH)
34+
35+
@staticmethod
36+
def _is_dataframe_name(name: str) -> bool:
37+
"""Check if the object name suggests it's a DataFrame (e.g., starts with 'df_')."""
38+
return name.startswith("df_")
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dataframe_iterrows import PandasIterrowsChecker
6+
7+
8+
class TestPandasIterrowsChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasIterrowsChecker
10+
11+
def test_iterrows_used(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
df_sales = pd.DataFrame({
16+
"Product": ["A", "B", "C"],
17+
"Sales": [100, 200, 300]
18+
})
19+
for index, row in df_sales.iterrows(): # [pandas-iterrows]
20+
print(row["Product"], row["Sales"])
21+
"""
22+
)
23+
24+
# Extract the Call node for the `iterrows` method
25+
iterrows_call = node.iter # This directly points to the `Call` node for `iterrows()`
26+
27+
with self.assertAddsMessages(
28+
pylint.testutils.MessageTest(
29+
msg_id="pandas-iterrows",
30+
confidence=HIGH,
31+
node=iterrows_call,
32+
),
33+
ignore_position=True,
34+
):
35+
self.checker.visit_call(iterrows_call)

0 commit comments

Comments
 (0)