Skip to content

Commit d5414d0

Browse files
authored
Merge pull request #42 from pylint-dev/21-add-pandas-convert-df-to-numpy-checker
Add pandas dataframe values checker
2 parents 7c870f0 + 6e2669d commit d5414d0

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for the usage of pandas.DataFrame.values and suggest .to_numpy() instead."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasValuesChecker(BaseChecker):
16+
name = "pandas-dataframe-values"
17+
msgs = {
18+
"W8112": (
19+
"Avoid using 'DataFrame.values'. Use '.to_numpy()' instead for better consistency and compatibility.",
20+
"pandas-dataframe-values",
21+
"Using 'DataFrame.values' is discouraged as it may not always return a NumPy array. Use '.to_numpy()' "
22+
"instead.",
23+
),
24+
}
25+
26+
@only_required_for_messages("pandas-dataframe-values")
27+
def visit_attribute(self, node: nodes.Attribute) -> None:
28+
if isinstance(node.expr, nodes.Name):
29+
if node.attrname == "values" and node.expr.name.startswith("df_"):
30+
self.add_message("pandas-dataframe-values", node=node, confidence=HIGH)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dataframe_values import PandasValuesChecker
6+
7+
8+
class TestPandasValuesChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasValuesChecker
10+
11+
def test_values_usage_with_correct_naming(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
df_sales = pd.DataFrame({
16+
"A": [1, 2, 3],
17+
"B": [4, 5, 6]
18+
})
19+
data = df_sales.values # [pandas-dataframe-values]
20+
"""
21+
)
22+
23+
# Access the attribute that is 'values'
24+
attribute_node = node.value
25+
26+
with self.assertAddsMessages(
27+
pylint.testutils.MessageTest(
28+
msg_id="pandas-dataframe-values",
29+
confidence=HIGH,
30+
node=attribute_node,
31+
),
32+
ignore_position=True,
33+
):
34+
self.checker.visit_attribute(attribute_node)
35+
36+
def test_no_warning_for_to_numpy(self):
37+
node = astroid.extract_node(
38+
"""
39+
import pandas as pd
40+
df_sales = pd.DataFrame({
41+
"A": [1, 2, 3],
42+
"B": [4, 5, 6]
43+
})
44+
df_data = df_sales.to_numpy() # This should not trigger any warnings
45+
"""
46+
)
47+
48+
with self.assertNoMessages():
49+
self.checker.visit_call(node)

0 commit comments

Comments
 (0)