Skip to content

Commit 290c04e

Browse files
author
Peter Hamfelt
committed
Add pandas dtyoe parameter checker
1 parent adcf512 commit 290c04e

File tree

4 files changed

+69
-0
lines changed

4 files changed

+69
-0
lines changed

pylint_ml/checkers/pandas/pandas_copy_on_write.py

Whitespace-only changes.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from astroid import nodes
2+
from pylint.checkers import BaseChecker
3+
from pylint.checkers.utils import only_required_for_messages
4+
from pylint.interfaces import HIGH
5+
6+
7+
class PandasDtypeChecker(BaseChecker):
8+
name = "pandas-dtype-param"
9+
msgs = {
10+
"W8117": (
11+
"Specify 'dtype' when using '%s' for better performance and data integrity.",
12+
"pandas-dtype-param",
13+
"It's recommended to explicitly specify the 'dtype' parameter in pandas read functions."
14+
),
15+
}
16+
17+
@only_required_for_messages("pandas-dtype-param")
18+
def visit_call(self, node: nodes.Call) -> None:
19+
# Check if the function being called is a pandas read function
20+
if isinstance(node.func, nodes.Attribute):
21+
module_name = getattr(node.func.expr, "name", None)
22+
func_name = node.func.attrname
23+
24+
if module_name == "pd" and func_name in {"read_csv", "read_excel", "read_table"}:
25+
# Check if dtype is specified
26+
if not any(kw.arg == "dtype" for kw in node.keywords):
27+
self.add_message("pandas-dtype-param", node=node, confidence=HIGH)

tests/checkers/test_pandas/test_pandas_copy_on_write.py

Whitespace-only changes.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import astroid
2+
import pylint.testutils
3+
from pylint.interfaces import HIGH
4+
5+
from pylint_ml.checkers.pandas.pandas_dtype_param import PandasDtypeChecker
6+
7+
8+
class TestPandasDtypeChecker(pylint.testutils.CheckerTestCase):
9+
CHECKER_CLASS = PandasDtypeChecker
10+
11+
def test_dtype_specified(self):
12+
node = astroid.extract_node(
13+
"""
14+
import pandas as pd
15+
df = pd.read_csv('file.csv', dtype={'column1': 'int32'})
16+
"""
17+
)
18+
19+
dtype_call = node.value
20+
21+
with self.assertNoMessages():
22+
self.checker.visit_call(node.value)
23+
24+
def test_dtype_missing(self):
25+
node = astroid.extract_node(
26+
"""
27+
import pandas as pd
28+
df_sales = pd.read_csv('file.csv')
29+
"""
30+
)
31+
32+
dtype_call = node.value
33+
34+
with self.assertAddsMessages(
35+
pylint.testutils.MessageTest(
36+
msg_id="pandas-dtype-param",
37+
confidence=HIGH,
38+
node=dtype_call,
39+
),
40+
ignore_position=True,
41+
):
42+
self.checker.visit_call(dtype_call)

0 commit comments

Comments
 (0)