Rule S6982: module mode should be set after load_state_dict (#1976)

Seppli11 · web-flow · commit 0068790ea1a1 · 2024-09-19T14:17:14.000+02:00
* SONARPY-1910 add metadata * SONARPY-1910 Rule S6982: module mode should be set after load_state_dict * SONARPY-1910 add license header and update Checklist * SONARPY-1910 fix SQ issues * SONARPY-1910 address pr comments
diff --git a/python-checks/src/main/java/org/sonar/python/checks/CheckList.java b/python-checks/src/main/java/org/sonar/python/checks/CheckList.java
@@ -368,6 +368,7 @@ public static Iterable<Class> getChecks() {
       TooManyReturnsCheck.class,
       TorchAutogradVariableShouldNotBeUsedCheck.class,
       TorchLoadLeadsToUntrustedCodeExecutionCheck.class,
+      TorchModuleModeShouldBeSetAfterLoadingCheck.class,
       TrailingCommentCheck.class,
       TrailingWhitespaceCheck.class,
       TypeAliasAnnotationCheck.class,
diff --git a/python-checks/src/main/java/org/sonar/python/checks/TorchModuleModeShouldBeSetAfterLoadingCheck.java b/python-checks/src/main/java/org/sonar/python/checks/TorchModuleModeShouldBeSetAfterLoadingCheck.java
@@ -0,0 +1,133 @@
+/*
+ * SonarQube Python Plugin
+ * Copyright (C) 2011-2024 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.python.checks;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import org.sonar.check.Rule;
+import org.sonar.plugins.python.api.PythonSubscriptionCheck;
+import org.sonar.plugins.python.api.symbols.Symbol;
+import org.sonar.plugins.python.api.symbols.Usage;
+import org.sonar.plugins.python.api.tree.Argument;
+import org.sonar.plugins.python.api.tree.CallExpression;
+import org.sonar.plugins.python.api.tree.Expression;
+import org.sonar.plugins.python.api.tree.Name;
+import org.sonar.plugins.python.api.tree.QualifiedExpression;
+import org.sonar.plugins.python.api.tree.RegularArgument;
+import org.sonar.plugins.python.api.tree.Tree;
+import org.sonar.python.cfg.fixpoint.ReachingDefinitionsAnalysis;
+import org.sonar.python.tree.TreeUtils;
+
+@Rule(key = "S6982")
+public class TorchModuleModeShouldBeSetAfterLoadingCheck extends PythonSubscriptionCheck {
+  private static final Set<String> STATE_SETTING_FUNCTION_FQNS = Set.of("eval", "train");
+  private static final String TORCH_LOAD_FQN = "torch.load";
+  private static final String LOAD_STATE_DICT_NAME = "load_state_dict";
+  private static final String MESSAGE = "Set the module in training or evaluation mode.";
+  private static final int IS_TORCH_LOAD_CALL_MAX_RECURSIVE_COUNTER = 10;
+
+  private ReachingDefinitionsAnalysis reachingDefinitionsAnalysis;
+
+  @Override
+  public void initialize(Context context) {
+    context.registerSyntaxNodeConsumer(Tree.Kind.FILE_INPUT, ctx -> reachingDefinitionsAnalysis =
+      new ReachingDefinitionsAnalysis(ctx.pythonFile()));
+
+    context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, ctx -> {
+      CallExpression callExpr = (CallExpression) ctx.syntaxNode();
+      List<Usage> receiverUsages = getForwardUsages(callExpr);
+      if (isLoadStateDictCall(callExpr) && !hasEvalOrTrainUsage(receiverUsages) && !isModelPassedOn(receiverUsages)) {
+        ctx.addIssue(callExpr.callee(), MESSAGE);
+      }
+    });
+  }
+
+  private boolean isLoadStateDictCall(CallExpression callExpr) {
+    // To properly check if the correct load_state_dict is called, typeshed type information would be required.
+    // Since this is currently not possible, we check if the parameter to load_state_dict is torch.load(...),
+    // with the assumption that if torch.load is passed to this load_state_dict, it is probably the correct method
+    if(callExpr.callee() instanceof QualifiedExpression qualifiedExpr) {
+      return LOAD_STATE_DICT_NAME.equals(qualifiedExpr.name().name()) && containsTorchLoadCall(callExpr.arguments());
+    }
+    return false;
+  }
+
+  private boolean containsTorchLoadCall(List<Argument> args) {
+    return args.stream()
+      .flatMap(TreeUtils.toStreamInstanceOfMapper(RegularArgument.class))
+      .anyMatch(arg -> isTorchLoadCall(arg.expression(), 0));
+  }
+
+  private boolean isTorchLoadCall(Expression expr, int recursiveCounter) {
+    if (recursiveCounter > IS_TORCH_LOAD_CALL_MAX_RECURSIVE_COUNTER) {
+      return false;
+    } else if (expr instanceof CallExpression callExpr) {
+      Symbol calleeSymbol = callExpr.calleeSymbol();
+      return calleeSymbol != null && TORCH_LOAD_FQN.equals(calleeSymbol.fullyQualifiedName());
+    } else if (expr instanceof Name name) {
+      return reachingDefinitionsAnalysis.valuesAtLocation(name).stream()
+        .anyMatch(definitionExpr -> isTorchLoadCall(definitionExpr, recursiveCounter + 1));
+    } else {
+      return false;
+    }
+  }
+
+  private static List<Usage> getForwardUsages(CallExpression callExpr) {
+    List<Usage> usages = getFunctionCallReceiverName(callExpr)
+      .flatMap(name -> Optional.ofNullable(name.symbol()))
+      .map(Symbol::usages)
+      .orElse(Collections.emptyList());
+
+    return usages.stream()
+      .filter(usage -> usage.tree().firstToken().line() > callExpr.firstToken().line())
+      .toList();
+  }
+
+  private static Optional<Name> getFunctionCallReceiverName(CallExpression callExpr) {
+    return Optional.ofNullable(callExpr.callee())
+      .flatMap(TreeUtils.toOptionalInstanceOfMapper(QualifiedExpression.class))
+      .flatMap(qualifiedExpr -> Optional.ofNullable(qualifiedExpr.qualifier()))
+      .flatMap(TreeUtils.toOptionalInstanceOfMapper(Name.class));
+  }
+
+  private static boolean hasEvalOrTrainUsage(List<Usage> usages) {
+    return usages.stream().anyMatch(TorchModuleModeShouldBeSetAfterLoadingCheck::isEvalOrTrain);
+  }
+
+  private static boolean isEvalOrTrain(Usage usage) {
+    Tree callTree = TreeUtils.firstAncestorOfKind(usage.tree(), Tree.Kind.CALL_EXPR);
+    if (callTree != null) {
+      CallExpression usageCall = (CallExpression) callTree;
+      Symbol usageCallSymbol = usageCall.calleeSymbol();
+      return usageCallSymbol != null && STATE_SETTING_FUNCTION_FQNS.contains(usageCallSymbol.name());
+    }
+    return false;
+  }
+
+  private static boolean isModelPassedOn(List<Usage> usages) {
+    return usages.stream().anyMatch(TorchModuleModeShouldBeSetAfterLoadingCheck::isPassingModel);
+  }
+
+  private static boolean isPassingModel(Usage usage) {
+    return TreeUtils.firstAncestorOfKind(usage.tree(), Tree.Kind.CALL_EXPR) != null;
+  }
+}
diff --git a/python-checks/src/main/resources/org/sonar/l10n/py/rules/python/S6982.html b/python-checks/src/main/resources/org/sonar/l10n/py/rules/python/S6982.html
@@ -0,0 +1,39 @@
+<p>This rule raises an issue when a PyTorch model state is loaded and <code>torch.nn.Module.eval()</code> or <code>torch.nn.Module.train()</code> is
+not called.</p>
+<h2>Why is this an issue?</h2>
+<p>When using PyTorch it is common practice to load and save a model’s state from/to a <code>.pth</code> file. Doing so allows, for example, to
+instantiate an untrained model and load learned parameters coming from another pre-trained model. Once the learned parameters are loaded to the model
+it is important, before inferencing, to clearly state the intention by calling <code>torch.nn.Module.eval()</code> method to set the model in
+evaluation mode or calling <code>torch.nn.Module.train()</code> to indicate the training will resume. Failing to call
+<code>torch.nn.Module.eval()</code> would leave the model in training mode which may not be the intention.</p>
+<h2>How to fix it</h2>
+<p>Call the <code>torch.nn.Module.eval()</code> or <code>torch.nn.Module.train()</code> method on the model.</p>
+<h3>Code examples</h3>
+<h4>Noncompliant code example</h4>
+<pre data-diff-id="1" data-diff-type="noncompliant">
+import torch
+import torchvision.models as models
+
+model = models.vgg16()
+model.load_state_dict(torch.load('model_weights.pth')) # Noncompliant: model.train() or model.eval() was not called.
+</pre>
+<h4>Compliant solution</h4>
+<pre data-diff-id="1" data-diff-type="compliant">
+import torch
+import torchvision.models as models
+
+model = models.vgg16()
+model.load_state_dict(torch.load('model_weights.pth'))
+model.eval()
+</pre>
+<h2>Resources</h2>
+<h3>Documentation</h3>
+<ul>
+  <li> PyTorch Documentation - <a href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.eval">eval - reference</a>
+  </li>
+  <li> PyTorch Documentation - <a href="https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.train">train - reference</a>
+  </li>
+  <li> PyTorch Documentation - <a href="https://pytorch.org/docs/stable/notes/autograd.html#evaluation-mode-nn-module-eval">Autograd - Evaluation
+  Mode</a> </li>
+</ul>
+
diff --git a/python-checks/src/main/resources/org/sonar/l10n/py/rules/python/S6982.json b/python-checks/src/main/resources/org/sonar/l10n/py/rules/python/S6982.json
@@ -0,0 +1,25 @@
+{
+  "title": "\"model.eval()\" or \"model.train()\" should be called after loading the state of a PyTorch model",
+  "type": "CODE_SMELL",
+  "status": "ready",
+  "remediation": {
+    "func": "Constant\/Issue",
+    "constantCost": "1min"
+  },
+  "tags": [
+    "pytorch",
+    "machine-learning"
+  ],
+  "defaultSeverity": "Major",
+  "ruleSpecification": "RSPEC-6982",
+  "sqKey": "S6982",
+  "scope": "All",
+  "quickfix": "infeasible",
+  "code": {
+    "impacts": {
+      "MAINTAINABILITY": "LOW",
+      "RELIABILITY": "MEDIUM"
+    },
+    "attribute": "CLEAR"
+  }
+}
diff --git a/python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json b/python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_way_profile.json
@@ -247,6 +247,7 @@
     "S6973",
     "S6974",
     "S6979",
+    "S6982",
     "S6983",
     "S6984",
     "S6985"
diff --git a/python-checks/src/test/java/org/sonar/python/checks/TorchModuleModeShouldBeSetAfterLoadingCheckTest.java b/python-checks/src/test/java/org/sonar/python/checks/TorchModuleModeShouldBeSetAfterLoadingCheckTest.java
@@ -0,0 +1,30 @@
+/*
+ * SonarQube Python Plugin
+ * Copyright (C) 2011-2024 SonarSource SA
+ * mailto:info AT sonarsource DOT com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+package org.sonar.python.checks;
+
+import org.junit.jupiter.api.Test;
+import org.sonar.python.checks.utils.PythonCheckVerifier;
+
+class TorchModuleModeShouldBeSetAfterLoadingCheckTest {
+  @Test
+  void test() {
+    PythonCheckVerifier.verify("src/test/resources/checks/torchModuleModeShouldBeSetAfterLoadingCheck.py", new TorchModuleModeShouldBeSetAfterLoadingCheck());
+  }
+}
diff --git a/python-checks/src/test/resources/checks/torchModuleModeShouldBeSetAfterLoadingCheck.py b/python-checks/src/test/resources/checks/torchModuleModeShouldBeSetAfterLoadingCheck.py
@@ -0,0 +1,53 @@
+import torch
+import torchvision.models as models
+
+def noncompliant():
+    model = models.vgg16()
+    model.load_state_dict(torch.load('model_weights.pth')) # Noncompliant {{Set the module in training or evaluation mode.}}
+   #^^^^^^^^^^^^^^^^^^^^^
+    ...
+
+def noncompliant(model):
+    model.load_state_dict(torch.load('model_weights.pth')) # Noncompliant
+
+def noncompliant():
+    model.load_state_dict(torch.load('model_weights.pth')) # Noncompliant
+
+def noncompliant():
+    get_model().load_state_dict(torch.load('model_weights.pth')) # Noncompliant
+
+def noncompliant(model):
+    weights = torch.load('model_weights.pth')
+    weights2 = weights
+    model.load_state_dict(weights2) # Noncompliant
+
+def noncompliant():
+    model = models.vgg16()
+    model.train()
+    model.load_state_dict(torch.load('model_weights.pth')) # Noncompliant
+    other_model = model
+
+def compliant(model):
+    weights = weights
+    model.load_state_dict(weights)
+
+def compliant():
+    model1 = models.vgg16()
+    model1.load_state_dict(torch.load('model_weights.pth'))
+    model1.eval()
+
+def compliant():
+    model2 = models.vgg16()
+    model2.load_state_dict(torch.load('model_weights.pth'))
+    other_model = model2
+    model2.train()
+
+def compliant():
+    model3 = models.vgg16()
+    model3.load_state_dict(torch.load('model_weights.pth')) # Ok if model is passed as argument to a function do not raise at all train or eval could be called in such functions
+    foo(model3)
+
+def compliant():
+    # Ok since no torch.load() result is passed as an argument
+    model.load_state_dict(1 + 1)
+    model.load_state_dict((lambda x: x)())