Skip to content

Commit de9c090

Browse files
authored
Merge pull request #44 from pylint-dev/19-add-pandas-column-datatype-not-explicitly-set-checker
Add pandas dtype parameter checker
2 parents adcf512 + 27f5213 commit de9c090

File tree

5 files changed

+69
-15
lines changed

5 files changed

+69
-15
lines changed

pylint_ml/checkers/pandas/pandas_copy_on_write.py

Whitespace-only changes.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from astroid import nodes
2+
from pylint.checkers import BaseChecker
3+
from pylint.checkers.utils import only_required_for_messages
4+
from pylint.interfaces import HIGH
5+
6+
7+
class PandasDtypeChecker(BaseChecker):
8+
name = "pandas-dtype-param"
9+
msgs = {
10+
"W8117": (
11+
"Specify 'dtype' when using '%s' for better performance and data integrity.",
12+
"pandas-dtype-param",
13+
"It's recommended to explicitly specify the 'dtype' parameter in pandas read functions.",
14+
),
15+
}
16+
17+
@only_required_for_messages("pandas-dtype-param")
18+
def visit_call(self, node: nodes.Call) -> None:
19+
# Check if the function being called is a pandas read function
20+
if isinstance(node.func, nodes.Attribute):
21+
module_name = getattr(node.func.expr, "name", None)
22+
func_name = node.func.attrname
23+
24+
if module_name == "pd" and func_name in {"read_csv", "read_excel", "read_table"}:
25+
# Check if dtype is specified
26+
if not any(kw.arg == "dtype" for kw in node.keywords):
27+
self.add_message("pandas-dtype-param", node=node, confidence=HIGH)

tests/checkers/test_pandas/test_pandas_copy_on_write.py

Whitespace-only changes.

tests/checkers/test_pandas/test_pandas_dataframe_values.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,3 @@ def test_values_usage_with_correct_naming(self):
3232
ignore_position=True,
3333
):
3434
self.checker.visit_attribute(attribute_node)
35-
36-
def test_no_warning_for_to_numpy(self):
37-
node = astroid.extract_node(
38-
"""
39-
import pandas as pd
40-
df_sales = pd.DataFrame({
41-
"A": [1, 2, 3],
42-
"B": [4, 5, 6]
43-
})
44-
df_data = df_sales.to_numpy() # This should not trigger any warnings
45-
"""
46-
)
47-
48-
with self.assertNoMessages():
49-
self.checker.visit_call(node)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dtype_param import PandasDtypeChecker
6+
7+
8+
class TestPandasDtypeChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasDtypeChecker
10+
11+
def test_dtype_specified(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
df = pd.read_csv('file.csv', dtype={'column1': 'int32'})
16+
"""
17+
)
18+
19+
dtype_call = node.value
20+
21+
with self.assertNoMessages():
22+
self.checker.visit_call(dtype_call)
23+
24+
def test_dtype_missing(self):
25+
node = astroid.extract_node(
26+
"""
27+
import pandas as pd
28+
df_sales = pd.read_csv('file.csv')
29+
"""
30+
)
31+
32+
dtype_call = node.value
33+
34+
with self.assertAddsMessages(
35+
pylint.testutils.MessageTest(
36+
msg_id="pandas-dtype-param",
37+
confidence=HIGH,
38+
node=dtype_call,
39+
),
40+
ignore_position=True,
41+
):
42+
self.checker.visit_call(dtype_call)

0 commit comments

Comments
 (0)