Skip to content

Commit 3e9ae80

Browse files
authored
Merge pull request #37 from pylint-dev/23-add-pandas-dataframe-bool-checker
Add pandas dataframe and series bool checker
2 parents 718be9a + 69107f8 commit 3e9ae80

9 files changed

+247
-5
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for usage of the deprecated pandas DataFrame.bool() method."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
# Todo add version deprecated
15+
16+
17+
class PandasDataFrameBoolChecker(BaseChecker):
18+
name = "pandas-dataframe-bool"
19+
msgs = {
20+
"W8104": (
21+
"Use of deprecated pandas DataFrame.bool() method",
22+
"pandas-dataframe-bool",
23+
"Avoid using the deprecated pandas DataFrame.bool() method.",
24+
),
25+
}
26+
27+
@only_required_for_messages("pandas-dataframe-bool")
28+
def visit_call(self, node: nodes.Call) -> None:
29+
if isinstance(node.func, nodes.Attribute):
30+
method_name = getattr(node.func, "attrname", None)
31+
32+
if method_name == "bool":
33+
# Check if the object calling .bool() has a name starting with 'df_'
34+
object_name = getattr(node.func.expr, "name", None)
35+
if object_name and self._is_valid_dataframe_name(object_name):
36+
self.add_message("pandas-dataframe-bool", node=node, confidence=HIGH)
37+
38+
@staticmethod
39+
def _is_valid_dataframe_name(name: str) -> bool:
40+
"""Check if the DataFrame name starts with 'df_'."""
41+
return name.startswith("df_")

pylint_ml/checkers/pandas/pandas_dataframe_naming.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,8 @@ def visit_assign(self, node: nodes.Assign) -> None:
2929
module_name = getattr(node.value.func.expr, "name", None)
3030

3131
if func_name == "DataFrame" and module_name == "pd":
32-
3332
for target in node.targets:
3433
if isinstance(target, nodes.AssignName):
3534
var_name = target.name
3635
if not var_name.startswith("df_") or len(var_name) <= 3:
3736
self.add_message("pandas-dataframe-naming", node=node, confidence=HIGH)
38-
39-
def _check_variable_name(self, var_name, node):
40-
if not var_name.startswith("df_") or len(var_name) <= 3:
41-
self.add_message("pandas-dataframe-naming", node=node, confidence=HIGH)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for usage of the deprecated pandas Series.bool() method."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
# Todo add version deprecated
15+
16+
17+
class PandasSeriesBoolChecker(BaseChecker):
18+
name = "pandas-series-bool"
19+
msgs = {
20+
"W8105": (
21+
"Use of deprecated pandas Series.bool() method",
22+
"pandas-series-bool",
23+
"Avoid using the deprecated pandas Series.bool() method.",
24+
),
25+
}
26+
27+
@only_required_for_messages("pandas-series-bool")
28+
def visit_call(self, node: nodes.Call) -> None:
29+
if isinstance(node.func, nodes.Attribute):
30+
method_name = getattr(node.func, "attrname", None)
31+
32+
if method_name == "bool":
33+
# Check if the object calling .bool() has a name starting with 'ser'
34+
object_name = getattr(node.func.expr, "name", None)
35+
if object_name and self._is_valid_series_name(object_name):
36+
self.add_message("pandas-series-bool", node=node, confidence=HIGH)
37+
38+
@staticmethod
39+
def _is_valid_series_name(name: str) -> bool:
40+
"""Check if the Series name starts with 'ser_'."""
41+
return name.startswith("ser_")
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for consistent naming of pandas Series variables."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasSeriesNamingChecker(BaseChecker):
16+
name = "pandas-series-naming"
17+
msgs = {
18+
"W8103": (
19+
"Pandas Series variable names should start with 'ser_' followed by descriptive text",
20+
"pandas-series-naming",
21+
"Ensure that pandas Series variables follow the naming convention.",
22+
),
23+
}
24+
25+
@only_required_for_messages("pandas-series-naming")
26+
def visit_assign(self, node: nodes.Assign) -> None:
27+
print(node)
28+
if isinstance(node.value, nodes.Call):
29+
func_name = getattr(node.value.func, "attrname", None)
30+
module_name = getattr(node.value.func.expr, "name", None)
31+
32+
if func_name == "Series" and module_name == "pd":
33+
for target in node.targets:
34+
if isinstance(target, nodes.AssignName):
35+
var_name = target.name
36+
if not var_name.startswith("ser_") or len(var_name) <= 4:
37+
self.add_message("pandas-series-naming", node=node, confidence=HIGH)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dataframe_bool import PandasDataFrameBoolChecker
6+
7+
8+
class TestDataFrameBoolChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasDataFrameBoolChecker
10+
11+
def test_dataframe_bool_usage(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
df_customers = pd.DataFrame(data)
16+
df_customers.bool() # [pandas-dataframe-bool]
17+
"""
18+
)
19+
with self.assertAddsMessages(
20+
pylint.testutils.MessageTest(
21+
msg_id="pandas-dataframe-bool",
22+
confidence=HIGH,
23+
node=node,
24+
),
25+
ignore_position=True,
26+
):
27+
self.checker.visit_call(node)
28+
29+
def test_no_bool_usage(self):
30+
node = astroid.extract_node(
31+
"""
32+
import pandas as pd
33+
df_customers = pd.DataFrame(data)
34+
df_customers.sum() # This should pass without warnings
35+
"""
36+
)
37+
with self.assertNoMessages():
38+
self.checker.visit_call(node)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_series_bool import PandasSeriesBoolChecker
6+
7+
8+
class TestSeriesBoolChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasSeriesBoolChecker
10+
11+
def test_series_bool_usage(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
series = pd.Series(data)
16+
series.bool() # [pandas-series-bool]
17+
"""
18+
)
19+
with self.assertAddsMessages(
20+
pylint.testutils.MessageTest(
21+
msg_id="pandas-series-bool",
22+
confidence=HIGH,
23+
node=node,
24+
)
25+
):
26+
self.checker.visit_call(node)
27+
28+
def test_no_bool_usage(self):
29+
node = astroid.extract_node(
30+
"""
31+
import pandas as pd
32+
series = pd.Series(data)
33+
series.sum() # This should pass without warnings
34+
"""
35+
)
36+
with self.assertNoMessages():
37+
self.checker.visit_call(node)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_series_naming import PandasSeriesNamingChecker
6+
7+
8+
class TestPandasSeriesNamingChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasSeriesNamingChecker
10+
11+
def test_series_correct_naming(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
ser_sales = pd.Series([100, 200, 300])
16+
"""
17+
)
18+
with self.assertNoMessages():
19+
self.checker.visit_assign(node)
20+
21+
def test_series_incorrect_naming(self):
22+
node = astroid.extract_node(
23+
"""
24+
import pandas as pd
25+
df_sales = pd.Series([100, 200, 300])
26+
"""
27+
)
28+
with self.assertAddsMessages(
29+
pylint.testutils.MessageTest(
30+
msg_id="pandas-series-naming",
31+
confidence=HIGH,
32+
node=node,
33+
),
34+
ignore_position=True,
35+
):
36+
self.checker.visit_assign(node)
37+
38+
def test_series_invalid_length_naming(self):
39+
node = astroid.extract_node(
40+
"""
41+
import pandas as pd
42+
ser_ = pd.Series([True])
43+
"""
44+
)
45+
with self.assertAddsMessages(
46+
pylint.testutils.MessageTest(
47+
msg_id="pandas-series-naming",
48+
confidence=HIGH,
49+
node=node,
50+
),
51+
ignore_position=True,
52+
):
53+
self.checker.visit_assign(node)

0 commit comments

Comments
 (0)