Skip to content

Commit 7c870f0

Browse files
authored
Merge pull request #41 from pylint-dev/25-add-pandas-dataframe-merge-parameter-not-explicitly-set-checker
Add pandas dataframe merge parameter checker
2 parents 1a276fc + c463661 commit 7c870f0

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for proper usage of pandas merge() with explicit parameters and DataFrame naming conventions."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasDataframeMergeChecker(BaseChecker):
16+
name = "pandas-dataframe-merge"
17+
msgs = {
18+
"W8110": (
19+
"Ensure that 'how', 'on', and 'validate' parameters are explicitly specified in pandas DataFrame merge().",
20+
"pandas-dataframe-merge",
21+
"Explicitly specifying the 'how', 'on', and 'validate' parameters and using a proper DataFrame naming "
22+
"convention improves readability and prevents unintended behavior.",
23+
),
24+
}
25+
26+
@only_required_for_messages("pandas-dataframe-merge")
27+
def visit_call(self, node: nodes.Call) -> None:
28+
if isinstance(node.func, nodes.Attribute):
29+
method_name = node.func.attrname
30+
object_name = getattr(node.func.expr, "name", "")
31+
if method_name == "merge":
32+
# Check if the DataFrame name starts with 'df_'
33+
name_is_valid = object_name.startswith("df_")
34+
35+
# Check for explicit 'how', 'on', and 'validate' parameters
36+
how_specified = any(kw.arg == "how" for kw in node.keywords)
37+
on_specified = any(kw.arg == "on" for kw in node.keywords)
38+
validate_specified = any(kw.arg == "validate" for kw in node.keywords)
39+
40+
if not (name_is_valid and how_specified and on_specified and validate_specified):
41+
self.add_message("pandas-dataframe-merge", node=node, confidence=HIGH)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dataframe_merge import PandasDataframeMergeChecker
6+
7+
8+
class TestPandasMergeChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasDataframeMergeChecker
10+
11+
def test_merge_without_explicit_params(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
df_3 = df_1.merge(df_2) # [pandas-dataframe-merge]
16+
"""
17+
)
18+
19+
merge_call = node.value
20+
21+
with self.assertAddsMessages(
22+
pylint.testutils.MessageTest(
23+
msg_id="pandas-dataframe-merge",
24+
confidence=HIGH,
25+
node=merge_call,
26+
),
27+
ignore_position=True,
28+
):
29+
self.checker.visit_call(merge_call)
30+
31+
def test_merge_with_missing_validate(self):
32+
node = astroid.extract_node(
33+
"""
34+
import pandas as pd
35+
df_3 = df_1.merge(df_2, how='inner', on='col1') # [pandas-dataframe-merge]
36+
"""
37+
)
38+
39+
merge_call = node.value
40+
41+
with self.assertAddsMessages(
42+
pylint.testutils.MessageTest(
43+
msg_id="pandas-dataframe-merge",
44+
confidence=HIGH,
45+
node=merge_call,
46+
),
47+
ignore_position=True,
48+
):
49+
self.checker.visit_call(merge_call)
50+
51+
def test_merge_with_wrong_naming_and_missing_params(self):
52+
node = astroid.extract_node(
53+
"""
54+
import pandas as pd
55+
merged_df = df_1.merge(df_2) # [pandas-dataframe-merge]
56+
"""
57+
)
58+
59+
merge_call = node.value
60+
61+
with self.assertAddsMessages(
62+
pylint.testutils.MessageTest(
63+
msg_id="pandas-dataframe-merge",
64+
confidence=HIGH,
65+
node=merge_call,
66+
),
67+
ignore_position=True,
68+
):
69+
self.checker.visit_call(merge_call)
70+
71+
def test_merge_with_all_params_and_correct_naming(self):
72+
node = astroid.extract_node(
73+
"""
74+
import pandas as pd
75+
df_merged = df_1.merge(df_2, how='inner', on='col1', validate='1:1') # This should not trigger any warnings
76+
"""
77+
)
78+
79+
merge_call = node.value
80+
81+
with self.assertNoMessages():
82+
self.checker.visit_call(merge_call)

0 commit comments

Comments
 (0)