Skip to content

Commit bcd3002

Browse files
author
Peter Hamfelt
committed
Add pandas iterrows checker
1 parent 1df441b commit bcd3002

17 files changed

+456
-0
lines changed

.idea/.gitignore

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/Project_Default.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/inspectionProfiles/profiles_settings.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/misc.xml

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/pylint-ml.iml

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for usage of the deprecated pandas DataFrame.bool() method."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
# Todo add version deprecated
15+
16+
17+
class PandasDataFrameBoolChecker(BaseChecker):
18+
name = "pandas-dataframe-bool"
19+
msgs = {
20+
"W8104": (
21+
"Use of deprecated pandas DataFrame.bool() method",
22+
"pandas-dataframe-bool",
23+
"Avoid using the deprecated pandas DataFrame.bool() method.",
24+
),
25+
}
26+
27+
@only_required_for_messages("pandas-dataframe-bool")
28+
def visit_call(self, node: nodes.Call) -> None:
29+
if isinstance(node.func, nodes.Attribute):
30+
method_name = getattr(node.func, "attrname", None)
31+
32+
if method_name == "bool":
33+
# Check if the object calling .bool() has a name starting with 'df_'
34+
object_name = getattr(node.func.expr, "name", None)
35+
if object_name and self._is_valid_dataframe_name(object_name):
36+
self.add_message("pandas-dataframe-bool", node=node, confidence=HIGH)
37+
38+
@staticmethod
39+
def _is_valid_dataframe_name(name: str) -> bool:
40+
"""Check if the DataFrame name starts with 'df_'."""
41+
return name.startswith("df_")
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for usage of the inefficient pandas DataFrame.iterrows() method."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasIterrowsChecker(BaseChecker):
16+
name = "pandas-iterrows"
17+
msgs = {
18+
"W8106": (
19+
"Usage of pandas DataFrame.iterrows() detected",
20+
"pandas-iterrows",
21+
"Avoid using DataFrame.iterrows() for large datasets. Consider using vectorized operations or "
22+
".itertuples() instead.",
23+
),
24+
}
25+
26+
@only_required_for_messages("pandas-iterrows")
27+
def visit_call(self, node: nodes.Call) -> None:
28+
if isinstance(node.func, nodes.Attribute):
29+
method_name = getattr(node.func, "attrname", None)
30+
if method_name == "iterrows":
31+
object_name = getattr(node.func.expr, "name", None)
32+
if object_name and self._is_dataframe_name(object_name):
33+
self.add_message("pandas-iterrows", node=node, confidence=HIGH)
34+
35+
@staticmethod
36+
def _is_dataframe_name(name: str) -> bool:
37+
"""Check if the object name suggests it's a DataFrame (e.g., starts with 'df_')."""
38+
return name.startswith("df_")
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for consistent naming of pandas DataFrame variables."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasDataFrameNamingChecker(BaseChecker):
16+
name = "pandas-dataframe-naming"
17+
msgs = {
18+
"W8103": (
19+
"Pandas DataFrame variable names should start with 'df_' followed by descriptive text",
20+
"pandas-dataframe-naming",
21+
"Ensure that pandas DataFrame variables follow the naming convention.",
22+
),
23+
}
24+
25+
@only_required_for_messages("pandas-dataframe-naming")
26+
def visit_assign(self, node: nodes.Assign) -> None:
27+
if isinstance(node.value, nodes.Call):
28+
func_name = getattr(node.value.func, "attrname", None)
29+
module_name = getattr(node.value.func.expr, "name", None)
30+
31+
if func_name == "DataFrame" and module_name == "pd":
32+
for target in node.targets:
33+
if isinstance(target, nodes.AssignName):
34+
var_name = target.name
35+
if not var_name.startswith("df_") or len(var_name) <= 3:
36+
self.add_message("pandas-dataframe-naming", node=node, confidence=HIGH)

0 commit comments

Comments
 (0)