From 6ed2eb1478485d568c112ba4b4e200d0b6f09532 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 10 Feb 2025 12:43:07 +0000 Subject: [PATCH] Ruby: Update internal dataflow files. --- .../dataflow/internal/DataFlowDispatch.qll | 175 +-------- .../dataflow/internal/DataFlowPrivate.qll | 355 +++++++++++++++++- 2 files changed, 358 insertions(+), 172 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll index 268c289259e8..e7c05eaa065c 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll @@ -237,7 +237,7 @@ class NormalCall extends DataFlowCall, TNormalCall { * need to track the `View` instance (2) into the receiver of the adjusted method * call, in order to figure out that the call target is in fact `view.html.erb`. */ -private module ViewComponentRenderModeling { +module ViewComponentRenderModeling { private import codeql.ruby.frameworks.ViewComponent private class RenderMethod extends SummarizedCallable, LibraryCallableToIncludeInTypeTracking { @@ -333,7 +333,7 @@ private predicate selfInModule(SelfVariable self, Module m) { /** Holds if `self` belongs to method `method` inside module `m`. */ pragma[nomagic] -private predicate selfInMethod(SelfVariable self, MethodBase method, Module m) { +predicate selfInMethod(SelfVariable self, MethodBase method, Module m) { exists(ModuleBase encl | method = self.getDeclaringScope() and encl = method.getEnclosingModule() and @@ -343,67 +343,6 @@ private predicate selfInMethod(SelfVariable self, MethodBase method, Module m) { ) } -/** Holds if `self` belongs to the top-level. */ -pragma[nomagic] -private predicate selfInToplevel(SelfVariable self, Module m) { - ViewComponentRenderModeling::selfInErbToplevel(self, m) - or - not ViewComponentRenderModeling::selfInErbToplevel(self, _) and - self.getDeclaringScope() instanceof Toplevel and - m = Module::TResolved("Object") -} - -/** - * Holds if SSA definition `def` belongs to a variable introduced via pattern - * matching on type `m`. For example, in - * - * ```rb - * case object - * in C => c then c.foo - * end - * ``` - * - * the SSA definition for `c` is introduced by matching on `C`. - */ -private predicate asModulePattern(SsaDefinitionExtNode def, Module m) { - exists(AsPattern ap | - m = Module::resolveConstantReadAccess(ap.getPattern()) and - def.getDefinitionExt().(Ssa::WriteDefinition).getWriteAccess().getAstNode() = - ap.getVariableAccess() - ) -} - -/** - * Holds if `read1` and `read2` are adjacent reads of SSA definition `def`, - * and `read2` is checked to have type `m`. For example, in - * - * ```rb - * case object - * when C then object.foo - * end - * ``` - * - * the two reads of `object` are adjacent, and the second is checked to have type `C`. - */ -private predicate hasAdjacentTypeCheckedReads( - Ssa::Definition def, CfgNodes::ExprCfgNode read1, CfgNodes::ExprCfgNode read2, Module m -) { - exists( - CfgNodes::ExprCfgNode pattern, ConditionBlock cb, CfgNodes::ExprNodes::CaseExprCfgNode case - | - m = Module::resolveConstantReadAccess(pattern.getExpr()) and - cb.getLastNode() = pattern and - cb.controls(read2.getBasicBlock(), - any(SuccessorTypes::MatchingSuccessor match | match.getValue() = true)) and - def.hasAdjacentReads(read1, read2) and - case.getValue() = read1 - | - pattern = case.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_) - or - pattern = case.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern() - ) -} - /** Holds if `new` is a user-defined `self.new` method. */ predicate isUserDefinedNew(SingletonMethod new) { exists(Expr object | singletonMethod(new, "new", object) | @@ -638,7 +577,7 @@ private predicate hasUserDefinedNew(Module m) { * `self.new` on `m`. */ pragma[nomagic] -private predicate isStandardNewCall(RelevantCall new, Module m, boolean exact) { +predicate isStandardNewCall(RelevantCall new, Module m, boolean exact) { exists(DataFlow::LocalSourceNode sourceNode | flowsToMethodCallReceiver(TNormalCall(new), sourceNode, "new") and // `m` should not have a user-defined `self.new` method @@ -667,106 +606,11 @@ private predicate localFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, } private module TrackInstanceInput implements CallGraphConstruction::InputSig { - pragma[nomagic] - private predicate isInstanceNoCall(DataFlow::Node n, Module tp, boolean exact) { - n.asExpr().getExpr() instanceof NilLiteral and - tp = Module::TResolved("NilClass") and - exact = true - or - n.asExpr().getExpr().(BooleanLiteral).isFalse() and - tp = Module::TResolved("FalseClass") and - exact = true - or - n.asExpr().getExpr().(BooleanLiteral).isTrue() and - tp = Module::TResolved("TrueClass") and - exact = true - or - n.asExpr().getExpr() instanceof IntegerLiteral and - tp = Module::TResolved("Integer") and - exact = true - or - n.asExpr().getExpr() instanceof FloatLiteral and - tp = Module::TResolved("Float") and - exact = true - or - n.asExpr().getExpr() instanceof RationalLiteral and - tp = Module::TResolved("Rational") and - exact = true - or - n.asExpr().getExpr() instanceof ComplexLiteral and - tp = Module::TResolved("Complex") and - exact = true - or - n.asExpr().getExpr() instanceof StringlikeLiteral and - tp = Module::TResolved("String") and - exact = true - or - n.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode and - tp = Module::TResolved("Array") and - exact = true - or - n.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode and - tp = Module::TResolved("Hash") and - exact = true - or - n.asExpr().getExpr() instanceof MethodBase and - tp = Module::TResolved("Symbol") and - exact = true - or - n.asParameter() instanceof BlockParameter and - tp = Module::TResolved("Proc") and - exact = true - or - n.asExpr().getExpr() instanceof Lambda and - tp = Module::TResolved("Proc") and - exact = true - or - // `self` reference in method or top-level (but not in module or singleton method, - // where instance methods cannot be called; only singleton methods) - n = - any(SelfLocalSourceNode self | - exists(MethodBase m | - selfInMethod(self.getVariable(), m, tp) and - not m instanceof SingletonMethod and - if m.getEnclosingModule() instanceof Toplevel then exact = true else exact = false - ) - or - selfInToplevel(self.getVariable(), tp) and - exact = true - ) - or - // `in C => c then c.foo` - asModulePattern(n, tp) and - exact = false - or - // `case object when C then object.foo` - hasAdjacentTypeCheckedReads(_, _, n.asExpr(), tp) and - exact = false - } - - pragma[nomagic] - private predicate isInstanceCall(DataFlow::Node n, Module tp, boolean exact) { - isStandardNewCall(n.asExpr(), tp, exact) - } - - /** Holds if `n` is an instance of type `tp`. */ - pragma[inline] - private predicate isInstance(DataFlow::Node n, Module tp, boolean exact) { - isInstanceNoCall(n, tp, exact) - or - isInstanceCall(n, tp, exact) - } - - pragma[nomagic] - private predicate hasAdjacentTypeCheckedReads(DataFlow::Node node) { - hasAdjacentTypeCheckedReads(_, _, node.asExpr(), _) - } - newtype State = additional MkState(Module m, Boolean exact) predicate start(DataFlow::Node start, State state) { exists(Module tp, boolean exact | state = MkState(tp, exact) | - isInstance(start, tp, exact) + TypeInference::hasType(start, tp, exact) or exists(Module m | (if m.isClass() then tp = Module::TResolved("Class") else tp = Module::TResolved("Module")) and @@ -784,6 +628,11 @@ private module TrackInstanceInput implements CallGraphConstruction::InputSig { ) } + pragma[nomagic] + private predicate hasAdjacentTypeCheckedRead(DataFlow::Node node) { + TypeInference::hasAdjacentTypeCheckedRead(node.asExpr(), _) + } + pragma[nomagic] predicate stepNoCall(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, StepSummary summary) { smallStepNoCall(nodeFrom, nodeTo, summary) @@ -791,8 +640,8 @@ private module TrackInstanceInput implements CallGraphConstruction::InputSig { // We exclude steps into type checked variables. For those, we instead rely on the // type being checked against localFlowStep(nodeFrom, nodeTo, summary) and - not hasAdjacentTypeCheckedReads(nodeTo) and - not asModulePattern(nodeTo, _) + not hasAdjacentTypeCheckedRead(nodeTo) and + not TypeInference::asModulePattern(nodeTo.(SsaDefinitionExtNode).getDefinitionExt(), _) } predicate stepCall(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, StepSummary summary) { @@ -1577,4 +1426,4 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos.isAnyNamed() and apos.isKeyword(_) or apos.isAnyNamed() and ppos.isKeyword(_) -} +} \ No newline at end of file diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll index 24e232a22c2c..e701388b3e7d 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -701,7 +701,9 @@ private module Cached { cached newtype TDataFlowType = + TModuleDataFlowType(Module m) or TLambdaDataFlowType(Callable c) { c = any(LambdaSelfReferenceNode n).getCallable() } or + TCollectionType() or TUnknownDataFlowType() } @@ -1890,21 +1892,83 @@ predicate expectsContent(Node n, ContentSet c) { } class DataFlowType extends TDataFlowType { - string toString() { result = "" } + string toString() { + exists(Module m | + this = TModuleDataFlowType(m) and + result = m.toString() + ) + or + this = TLambdaDataFlowType(_) and result = "[lambda]" + or + this = TCollectionType() and result = "[collection]" + or + this = TUnknownDataFlowType() and + result = "" + } + + predicate isUnknown() { this = TUnknownDataFlowType() } + + Location getLocation() { + exists(Module m | + this = TModuleDataFlowType(m) and + result = m.getLocation() + ) + or + exists(Callable c | this = TLambdaDataFlowType(c) and result = c.getLocation()) + } +} + +pragma[nomagic] +private predicate isProcClass(DataFlowType t) { + t = TModuleDataFlowType(any(TypeInference::ProcClass m)) +} + +pragma[nomagic] +private predicate isArrayClass(DataFlowType t) { + t = TModuleDataFlowType(any(TypeInference::ArrayClass m).getADescendent()) +} + +pragma[nomagic] +private predicate isHashClass(DataFlowType t) { + t = TModuleDataFlowType(any(TypeInference::HashClass m).getADescendent()) } +private predicate isCollectionClass(DataFlowType t) { isArrayClass(t) or isHashClass(t) } + predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { - t1 != TUnknownDataFlowType() and - t2 = TUnknownDataFlowType() + not t1.isUnknown() and + t2.isUnknown() + or + exists(Module m1, Module m2 | + t1 = TModuleDataFlowType(m1) and + t2 = TModuleDataFlowType(m2) and + m1.getAnImmediateAncestor+() = m2 + ) + or + t1 instanceof TLambdaDataFlowType and + isProcClass(t2) } -private predicate mustHaveLambdaType(ExprNode n, Callable c) { +private predicate mustHaveLambdaType(Node n, Callable c) { exists(VariableCapture::ClosureExpr ce, CfgNodes::ExprCfgNode e | e = n.asExpr() and ce.hasBody(c) | e = ce or ce.hasAliasedAccess(e) ) + or + n.(CaptureNode).getSynthesizedCaptureNode().isInstanceAccess() and + c = n.(CaptureNode).getSynthesizedCaptureNode().getEnclosingCallable() +} + +private predicate mustHaveCollectionType(Node n, DataFlowType t) { + exists(ContentSet c | readStep(n, c, _) or storeStep(_, c, n) or expectsContent(n, c) | + c.isElement() and + t = TCollectionType() + ) and + not n instanceof SynthHashSplatOrSplatArgumentNode and + not n instanceof SynthHashSplatParameterNode and + not n instanceof SynthSplatParameterNode } predicate localMustFlowStep(Node node1, Node node2) { @@ -1937,15 +2001,40 @@ DataFlowType getNodeType(Node n) { result = TLambdaDataFlowType(c) ) or + mustHaveCollectionType(n, result) + or not n instanceof LambdaSelfReferenceNode and not mustHaveLambdaType(n, _) and - result = TUnknownDataFlowType() + not mustHaveCollectionType(n, _) and + ( + TypeInference::hasModuleType(n, result) + or + not TypeInference::hasModuleType(n, _) and + result.isUnknown() + ) } -pragma[inline] +pragma[nomagic] private predicate compatibleTypesNonSymRefl(DataFlowType t1, DataFlowType t2) { - t1 != TUnknownDataFlowType() and - t2 = TUnknownDataFlowType() + not t1.isUnknown() and + t2.isUnknown() + or + t1 instanceof TLambdaDataFlowType and + isProcClass(t2) + or + t1 instanceof TCollectionType and + isCollectionClass(t2) +} + +pragma[nomagic] +private predicate compatibleModuleTypes(TModuleDataFlowType t1, TModuleDataFlowType t2) { + exists(Module m1, Module m2, Module m3 | + t1 = TModuleDataFlowType(m1) and + t2 = TModuleDataFlowType(m2) + | + m3.getAnAncestor() = m1 and + m3.getAnAncestor() = m2 + ) } /** @@ -1958,6 +2047,8 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { compatibleTypesNonSymRefl(t1, t2) or compatibleTypesNonSymRefl(t2, t1) + or + compatibleModuleTypes(t1, t2) } abstract class PostUpdateNodeImpl extends Node { @@ -2017,7 +2108,11 @@ private import PostUpdateNodes /** A node that performs a type cast. */ class CastNode extends Node { - CastNode() { none() } + CastNode() { + TypeInference::hasAdjacentTypeCheckedRead(this.asExpr(), _) + or + TypeInference::asModulePattern(this.(SsaDefinitionNode).getDefinition(), _) + } } /** @@ -2217,3 +2312,245 @@ class AdditionalJumpStep extends Unit { */ abstract predicate step(Node pred, Node succ); } + +/** Provides logic for assigning types to data flow nodes. */ +module TypeInference { + private import codeql.ruby.ast.internal.Module + private import DataFlowDispatch + + /** The built-in `Proc` class. */ + class ProcClass extends Module { + ProcClass() { this = TResolved("Proc") } + } + + /** The built-in `Array` class. */ + class ArrayClass extends Module { + ArrayClass() { this = TResolved("Array") } + } + + /** The built-in `Hash` class. */ + class HashClass extends Module { + HashClass() { this = TResolved("Hash") } + } + + /** The built-in `String` class. */ + class StringClass extends Module { + StringClass() { this = TResolved("String") } + } + + /** Holds if `self` belongs to the top-level. */ + pragma[nomagic] + private predicate selfInToplevel(SelfVariable self, Module m) { + ViewComponentRenderModeling::selfInErbToplevel(self, m) + or + not ViewComponentRenderModeling::selfInErbToplevel(self, _) and + self.getDeclaringScope() instanceof Toplevel and + m = TResolved("Object") + } + + /** + * Holds if SSA definition `def` belongs to a variable introduced via pattern + * matching on type `m`. For example, in + * + * ```rb + * case object + * in C => c then c.foo + * end + * ``` + * + * the SSA definition for `c` is introduced by matching on `C`. + */ + predicate asModulePattern(Ssa::WriteDefinition def, Module m) { + exists(AsPattern ap | + m = resolveConstantReadAccess(ap.getPattern()) and + def.getWriteAccess().getAstNode() = ap.getVariableAccess() + ) + } + + /** + * Holds if `caseRead` and `read` are reads of SSA definition `def`, + * and `read` is checked to have type `m`. For example, in + * + * ```rb + * case object + * when C then object.foo + * end + * ``` + * + * the second read of `object` is known to have type `C`. + */ + private predicate hasTypeCheckedRead( + Ssa::Definition def, CfgNodes::ExprCfgNode caseRead, CfgNodes::ExprCfgNode read, Module m + ) { + exists( + CfgNodes::ExprCfgNode pattern, ConditionBlock cb, CfgNodes::ExprNodes::CaseExprCfgNode case + | + m = resolveConstantReadAccess(pattern.getExpr()) and + cb.getLastNode() = pattern and + cb.controls(read.getBasicBlock(), + any(SuccessorTypes::MatchingSuccessor match | match.getValue() = true)) and + caseRead = def.getARead() and + read = def.getARead() and + case.getValue() = caseRead + | + pattern = case.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_) + or + pattern = case.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern() + ) + } + + predicate hasAdjacentTypeCheckedRead(CfgNodes::ExprCfgNode read, Module m) { + exists(Ssa::Definition def, CfgNodes::ExprCfgNode caseRead | + hasTypeCheckedRead(def, caseRead, read, m) and + def.hasAdjacentReads(caseRead, read) + ) + } + + private predicate isTypeCheckedRead(CfgNodes::ExprCfgNode read, Module m) { + exists(Ssa::Definition def | + hasTypeCheckedRead(def, _, read, m) and + // could in principle be checked against a new type + not exists(CfgNodes::ExprCfgNode innerCaseRead | + hasTypeCheckedRead(def, _, innerCaseRead, m) and + hasTypeCheckedRead(def, innerCaseRead, read, _) + ) + ) + } + + pragma[nomagic] + private predicate selfInMethodOrToplevelHasType(SelfVariable self, Module tp, boolean exact) { + exists(MethodBase m | + selfInMethod(self, m, tp) and + not m instanceof SingletonMethod and + if m.getEnclosingModule() instanceof Toplevel then exact = true else exact = false + ) + or + selfInToplevel(self, tp) and + exact = true + } + + pragma[nomagic] + private predicate parameterNodeHasType(ParameterNodeImpl p, Module tp, boolean exact) { + exists(ParameterPosition pos | + p.isParameterOf(_, pos) and + exact = true + | + (pos.isSplat(_) or pos.isSynthSplat(_)) and + tp instanceof ArrayClass + or + (pos.isHashSplat() or pos.isSynthHashSplat()) and + tp instanceof HashClass + ) + or + selfInMethodOrToplevelHasType(p.(SelfParameterNodeImpl).getSelfVariable(), tp, exact) + } + + pragma[nomagic] + private predicate ssaDefHasType(SsaDefinitionExtNode def, Module tp, boolean exact) { + exists(ParameterNodeImpl p | + parameterNodeHasType(p, tp, exact) and + p = SsaFlow::toParameterNodeImpl(def) + ) + or + selfInMethodOrToplevelHasType(def.getVariable(), tp, exact) + or + asModulePattern(def.getDefinitionExt(), tp) and + exact = false + } + + pragma[nomagic] + private predicate hasTypeNoCall(Node n, Module tp, boolean exact) { + n.asExpr().getExpr() instanceof NilLiteral and + tp = TResolved("NilClass") and + exact = true + or + n.asExpr().getExpr().(BooleanLiteral).isFalse() and + tp = TResolved("FalseClass") and + exact = true + or + n.asExpr().getExpr().(BooleanLiteral).isTrue() and + tp = TResolved("TrueClass") and + exact = true + or + n.asExpr().getExpr() instanceof IntegerLiteral and + tp = TResolved("Integer") and + exact = true + or + n.asExpr().getExpr() instanceof FloatLiteral and + tp = TResolved("Float") and + exact = true + or + n.asExpr().getExpr() instanceof RationalLiteral and + tp = TResolved("Rational") and + exact = true + or + n.asExpr().getExpr() instanceof ComplexLiteral and + tp = TResolved("Complex") and + exact = true + or + n.asExpr().getExpr() instanceof StringlikeLiteral and + tp instanceof StringClass and + exact = true + or + ( + n.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode or + n instanceof SynthSplatArgumentNode + ) and + tp instanceof ArrayClass and + exact = true + or + ( + n.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode + or + n instanceof SynthHashSplatArgumentNode + ) and + tp instanceof HashClass and + exact = true + or + n.asExpr().getExpr() instanceof MethodBase and + tp = TResolved("Symbol") and + exact = true + or + ( + n.asParameter() instanceof BlockParameter + or + n instanceof BlockParameterNode + or + n.asExpr().getExpr() instanceof Lambda + ) and + tp instanceof ProcClass and + exact = true + or + parameterNodeHasType(n, tp, exact) + or + exists(SsaDefinitionExtNode def | ssaDefHasType(def, tp, exact) | + n = def or + n.asExpr() = + any(CfgNodes::ExprCfgNode read | + read = def.getDefinitionExt().getARead() and + not isTypeCheckedRead(read, _) // could in principle be checked against a new type + ) + ) + or + // `case object when C then object.foo` + isTypeCheckedRead(n.asExpr(), tp) and + exact = false + } + + pragma[nomagic] + private predicate hasTypeCall(Node n, Module tp, boolean exact) { + isStandardNewCall(n.asExpr(), tp, exact) + } + + pragma[inline] + predicate hasType(Node n, Module tp, boolean exact) { + hasTypeNoCall(n, tp, exact) + or + hasTypeCall(n, tp, exact) + } + + pragma[nomagic] + predicate hasModuleType(Node n, DataFlowType t) { + exists(Module tp | t = TModuleDataFlowType(tp) | hasType(n, tp, _)) + } +} \ No newline at end of file