Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions python/ql/src/Variables/LoopVariableCapture.ql

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* @name Loop variable capture
* @description Capture of a loop variable is not the same as capturing the value of a loop variable, and may be erroneous.
Comment thread
joefarebrother marked this conversation as resolved.
Outdated
* @kind path-problem
* @tags correctness
* @problem.severity error
* @sub-severity low
* @precision high
* @id py/loop-variable-capture
*/

import python
import LoopVariableCaptureQuery
import semmle.python.dataflow.new.DataFlow
Comment thread Fixed
import EscapingCaptureFlow::PathGraph

from
CallableExpr capturing, AstNode loop, Variable var, string descr,
EscapingCaptureFlow::PathNode source, EscapingCaptureFlow::PathNode sink
where
escapingCapture(capturing, loop, var, source, sink) and
if capturing instanceof Lambda then descr = "lambda" else descr = "function"
select capturing, source, sink,
"This " + descr + " captures the loop variable $@, and may escape the loop by being stored at $@.",
loop, var.getId(), sink, "this location"

Check warning

Code scanning / CodeQL

Alert message style violation Warning

Try to more descriptive phrase instead of "this location" if possible.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/** Definitions for reasoning about loop variable capture issues. */

import python
import semmle.python.dataflow.new.DataFlow

/** A looping construct. */
abstract class Loop extends AstNode {
/**
* Gets a loop variable of this loop.
* For example, `x` and `y` in `for x,y in pairs: print(x+y)`
*/
abstract Variable getALoopVariable();
}

/** A `for` loop. */
private class ForLoop extends Loop, For {
override Variable getALoopVariable() {
this.getTarget() = result.getAnAccess().getParentNode*() and
result.getScope() = this.getScope()
}
}

/** Holds if the callable `capturing` captures the variable `var` from the loop `loop`. */
predicate capturesLoopVariable(CallableExpr capturing, Loop loop, Variable var) {
var.getAnAccess().getScope() = capturing.getInnerScope() and
capturing.getParentNode+() = loop and
var = loop.getALoopVariable()
}

/** Dataflow configuration for reasoning about callables that capture a loop variable and then may escape from the loop. */
module EscapingCaptureFlowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { capturesLoopVariable(node.asExpr(), _, _) }

predicate isSink(DataFlow::Node node) {
// Stored in a dict/list.
exists(Assign assign, Subscript sub |
sub = assign.getATarget() and node.asExpr() = assign.getValue()
)
or
// Stored in a list.
exists(DataFlow::MethodCallNode mc | mc.calls(_, "append") and node = mc.getArg(0))
or
// Used in a yield statement, likely included in a collection.
// The element of comprehension expressions desugar to involve a yield statement internally.
exists(Yield y | node.asExpr() = y.getValue())
// Checks for storing in a field leads to false positives, so are omitted.
}

predicate isBarrierOut(DataFlow::Node node) { isSink(node) }

predicate isBarrier(DataFlow::Node node) {
// Incorrect virtual dispatch to __call__ methods is a source of FPs.
exists(Function call |
call.getName() = "__call__" and
call.getArg(0) = node.(DataFlow::ParameterNode).getParameter()
)
}

predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet cs) {
isSink(node) and
(
cs instanceof DataFlow::TupleElementContent or
cs instanceof DataFlow::ListElementContent or
cs instanceof DataFlow::SetElementContent or
cs instanceof DataFlow::DictionaryElementAnyContent
)
}
}

/** Dataflow for reasoning about callables that capture a loop variable and then escape from the loop. */
module EscapingCaptureFlow = DataFlow::Global<EscapingCaptureFlowConfig>;

/** Holds if `capturing` is a callable that captures the variable `var` of the loop `loop`, and then may escape the loop via a flow path from `source` to `sink`. */
predicate escapingCapture(
CallableExpr capturing, Loop loop, Variable var, EscapingCaptureFlow::PathNode source,
EscapingCaptureFlow::PathNode sink
) {
capturesLoopVariable(capturing, loop, var) and
capturing = source.getNode().asExpr() and
EscapingCaptureFlow::flowPath(source, sink)
}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import python
import Variables.LoopVariableCapture.LoopVariableCaptureQuery
import utils.test.InlineExpectationsTest

module MethodArgTest implements TestSig {
string getARelevantTag() { result = ["capturedVar"] }
Comment thread Fixed

predicate hasActualResult(Location location, string element, string tag, string value) {
exists(CallableExpr capturing, AstNode loop, Variable var |
Comment thread Fixed
escapingCapture(capturing, loop, var, _, _) and
element = capturing.toString() and
location = capturing.getLocation() and
tag = "capturedVar" and
value = var.getId()
)
}
}

import MakeTest<MethodArgTest>
30 changes: 19 additions & 11 deletions python/ql/test/query-tests/Variables/capture/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
def bad1():
results = []
for x in range(10):
def inner():
def inner(): # $capturedVar=x
return x
results.append(inner)
results.append(inner)
return results

a = [lambda: i for i in range(1, 4)]
a = [lambda: i for i in range(1, 4)] # $capturedVar=i
for f in a:
print(f())

Expand All @@ -18,7 +18,14 @@ def good1():
for y in range(10):
def inner(y=y):
return y
results.append(inner)
results.append(inner)
return results

# OK: Using default argument.
def good2():
results = []
for y in range(10):
results.append(lambda y=y: y)
return results

#Factory function
Expand All @@ -39,29 +46,30 @@ def inner():
result += inner()
return result

b = [lambda: i for i in range(1, 4) for j in range(1,5)]
c = [lambda: j for i in range(1, 4) for j in range(1,5)]
b = [lambda: i for i in range(1, 4) for j in range(1,5)] # $capturedVar=i
c = [lambda: j for i in range(1, 4) for j in range(1,5)] # $capturedVar=j

s = {lambda: i for i in range(1, 4)}
s = {lambda: i for i in range(1, 4)} # $capturedVar=i
for f in s:
print(f())

d = {i:lambda: i for i in range(1, 4)}
d = {i:lambda: i for i in range(1, 4)} # $capturedVar=i
for k, f in d.items():
print(k, f())

#Generator expressions are sometimes OK, if they evaluate the iteration
#When the captured variable is used.
#So technically this is a false positive, but it is extremely fragile
#code, so I (Mark) think it is fine to report it as a violation.
g = (lambda: i for i in range(1, 4))
g = (lambda: i for i in range(1, 4)) # $capturedVar=i
for f in g:
print(f())

#But not if evaluated eagerly
l = list(lambda: i for i in range(1, 4))
l = list(lambda: i for i in range(1, 4)) # $capturedVar=i
for f in l:
print(f())

# This result is MISSING since the lambda is not detected to escape the loop
def odasa4860(asset_ids):
return dict((asset_id, filter(lambda c : c.asset_id == asset_id, xxx)) for asset_id in asset_ids)
return dict((asset_id, filter(lambda c : c.asset_id == asset_id, xxx)) for asset_id in asset_ids) # $MISSING: capturedVar=asset_id