Skip to content

Commit adcf512

Browse files
authored
Merge pull request #43 from pylint-dev/20-add-pandas-empty-column-misinitialization-checker
Add pandas dataframe empty column checker
2 parents d5414d0 + 5ea8f0e commit adcf512

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Licensed under the MIT: https://mit-license.org/
2+
# For details: https://github.com/pylint-dev/pylint-ml/LICENSE
3+
# Copyright (c) https://github.com/pylint-dev/pylint-ml/CONTRIBUTORS.txt
4+
5+
"""Check for proper initialization of empty columns in pandas DataFrames."""
6+
7+
from __future__ import annotations
8+
9+
from astroid import nodes
10+
from pylint.checkers import BaseChecker
11+
from pylint.checkers.utils import only_required_for_messages
12+
from pylint.interfaces import HIGH
13+
14+
15+
class PandasEmptyColumnChecker(BaseChecker):
16+
name = "pandas-dataframe-empty-column"
17+
msgs = {
18+
"W8113": (
19+
"Avoid using filler values (0, '') for new empty columns. Use 'np.nan' or 'pd.Series(dtype=...)' instead.",
20+
"pandas-dataframe-empty-column",
21+
"Initializing new columns with filler values such as 0 or empty strings can lead to issues with null "
22+
"value detection.",
23+
),
24+
}
25+
26+
@only_required_for_messages("pandas-dataframe-empty-column")
27+
def visit_subscript(self, node: nodes.Subscript) -> None:
28+
if isinstance(node.value, nodes.Name) and node.value.name.startswith("df_"):
29+
if isinstance(node.slice, nodes.Const) and isinstance(node.parent, nodes.Assign):
30+
if isinstance(node.parent.value, nodes.Const):
31+
# Checking for filler values: 0 or empty string
32+
if node.parent.value.value in (0, ""):
33+
self.add_message("pandas-dataframe-empty-column", node=node, confidence=HIGH)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dataframe_empty_column import PandasEmptyColumnChecker
6+
7+
8+
class TestPandasEmptyColumnChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasEmptyColumnChecker
10+
11+
def test_correct_empty_column_initialization(self):
12+
node = astroid.extract_node(
13+
"""
14+
import numpy as np
15+
import pandas as pd
16+
df_sales = pd.DataFrame()
17+
df_sales['new_col_float'] = np.nan # This should not trigger any warnings
18+
df_sales['new_col_int'] = pd.Series(dtype='int') # This should not trigger any warnings
19+
df_sales['new_col_str'] = pd.Series(dtype='object') # This should not trigger any warnings
20+
"""
21+
)
22+
with self.assertNoMessages():
23+
self.checker.visit_subscript(node)
24+
self.checker.visit_subscript(node)
25+
self.checker.visit_subscript(node)
26+
27+
def test_incorrect_empty_column_initialization_with_zero(self):
28+
node = astroid.extract_node(
29+
"""
30+
import pandas as pd
31+
df_sales = pd.DataFrame()
32+
df_sales['new_col_int'] = 0 # [pandas-dataframe-empty-column]
33+
"""
34+
)
35+
36+
# Extract the Subscript node for the df_sales['new_col_int'] assignment
37+
subscript_node = node.targets[0]
38+
39+
with self.assertAddsMessages(
40+
pylint.testutils.MessageTest(
41+
msg_id="pandas-dataframe-empty-column",
42+
confidence=HIGH,
43+
node=subscript_node,
44+
),
45+
ignore_position=True,
46+
):
47+
self.checker.visit_subscript(subscript_node)
48+
49+
def test_incorrect_empty_column_initialization_with_empty_string(self):
50+
node = astroid.extract_node(
51+
"""
52+
import pandas as pd
53+
df_sales = pd.DataFrame()
54+
df_sales['new_col_str'] = '' # [pandas-dataframe-empty-column]
55+
"""
56+
)
57+
58+
subscript_node = node.targets[0]
59+
60+
with self.assertAddsMessages(
61+
pylint.testutils.MessageTest(
62+
msg_id="pandas-dataframe-empty-column",
63+
confidence=HIGH,
64+
node=subscript_node,
65+
),
66+
ignore_position=True,
67+
):
68+
self.checker.visit_subscript(subscript_node)

0 commit comments

Comments
 (0)