Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions java/change-notes/2021-06-01-collection-flow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
lgtm,codescanning
* Data flow now tracks steps through collections and arrays more precisely.
That means that collection and array read steps are now matched up with
preceding store steps. This results in increased precision for all flow-based
queries, in particular most of the security queries.
5 changes: 3 additions & 2 deletions java/ql/src/semmle/code/java/dataflow/ExternalFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ private import FlowSummary
* ensuring that they are visible to the taint tracking / data flow library.
*/
private module Frameworks {
private import internal.ContainerFlow
private import semmle.code.java.frameworks.ApacheHttp
private import semmle.code.java.frameworks.apache.Lang
private import semmle.code.java.frameworks.guava.Guava
Expand Down Expand Up @@ -480,7 +481,7 @@ module CsvValidation {
not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and
msg = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
or
not type.regexpMatch("[a-zA-Z0-9_\\$]+") and
not type.regexpMatch("[a-zA-Z0-9_\\$<>]+") and
msg = "Dubious type \"" + type + "\" in " + pred + " model."
or
not name.regexpMatch("[a-zA-Z0-9_]*") and
Expand Down Expand Up @@ -561,7 +562,7 @@ private RefType interpretType(string namespace, string type, boolean subtypes) {
private string paramsStringPart(Callable c, int i) {
i = -1 and result = "("
or
exists(int n, string p | c.getParameterType(n).toString() = p |
exists(int n, string p | c.getParameterType(n).getErasure().toString() = p |
i = 2 * n and result = p
or
i = 2 * n - 1 and result = "," and n != 0
Expand Down
338 changes: 338 additions & 0 deletions java/ql/src/semmle/code/java/dataflow/internal/ContainerFlow.qll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ private import DataFlowImplCommon
private import DataFlowDispatch
private import semmle.code.java.controlflow.Guards
private import semmle.code.java.dataflow.SSA
private import ContainerFlow
private import FlowSummaryImpl as FlowSummaryImpl
import DataFlowNodes::Private

Expand Down Expand Up @@ -137,13 +138,15 @@ class MapValueContent extends Content, TMapValueContent {
* Thus, `node2` references an object with a field `f` that contains the
* value of `node1`.
*/
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
predicate storeStep(Node node1, Content f, Node node2) {
exists(FieldAccess fa |
instanceFieldAssign(node1.asExpr(), fa) and
node2.getPreUpdateNode() = getFieldQualifier(fa) and
node2.(PostUpdateNode).getPreUpdateNode() = getFieldQualifier(fa) and
f.(FieldContent).getField() = fa.getField()
)
or
f instanceof ArrayContent and arrayStoreStep(node1, node2)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, f, node2)
}

Expand Down Expand Up @@ -171,6 +174,10 @@ predicate readStep(Node node1, Content f, Node node2) {
node2.asExpr() = get
)
or
f instanceof ArrayContent and arrayReadStep(node1, node2, _)
or
f instanceof CollectionContent and collectionReadStep(node1, node2)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, f, node2)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ predicate simpleLocalFlowStep(Node node1, Node node2) {
or
node2.asExpr().(AssignExpr).getSource() = node1.asExpr()
or
node2.asExpr().(ArrayCreationExpr).getInit() = node1.asExpr()
or
exists(MethodAccess ma, ValuePreservingMethod m, int argNo |
ma.getCallee().getSourceDeclaration() = m and m.returnsValue(argNo)
|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ DataFlowType getContentType(Content c) {
or
c instanceof ArrayContent and
result instanceof TypeObject
or
c instanceof MapKeyContent and
result instanceof TypeObject
or
c instanceof MapValueContent and
result instanceof TypeObject
}

/** Gets the return type of kind `rk` for callable `c`. */
Expand Down
119 changes: 95 additions & 24 deletions java/ql/src/semmle/code/java/dataflow/internal/TaintTrackingUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,15 @@ private module Cached {
localAdditionalTaintUpdateStep(src.asExpr(),
sink.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr())
or
exists(Argument arg |
src.asExpr() = arg and
arg.isVararg() and
sink.(DataFlow::ImplicitVarargsArray).getCall() = arg.getCall()
exists(Content f |
readStep(src, f, sink) and
not sink.getTypeBound() instanceof PrimitiveType and
not sink.getTypeBound() instanceof BoxedType and
not sink.getTypeBound() instanceof NumberType
|
f instanceof ArrayContent or
f instanceof CollectionContent or
f instanceof MapValueContent
)
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(src, sink, false)
Expand Down Expand Up @@ -93,6 +98,92 @@ private module Cached {

import Cached

/**
* These configurations add a number of configuration-dependent additional taint
* steps to all taint configurations. For each sink or additional step provided
* by a given configuration the types are inspected to find those implicit
* collection or array read steps that might be required at the sink or step
* input. The corresponding store steps are then added as additional taint steps
* to provide backwards-compatible taint flow to such sinks and steps.
*
* This is a temporary measure until support is added for such sinks that
* require implicit read steps.
*/
private module StoreTaintSteps {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document this for the benefit of external readers who ought to know this is a stop-gap / temporary measure (and what it intends to do).

private import semmle.code.java.dataflow.TaintTracking
private import semmle.code.java.dataflow.TaintTracking2

private class StoreTaintConfig extends TaintTracking::Configuration {
StoreTaintConfig() { this instanceof TaintTracking::Configuration or none() }
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is somewhat unusual, could you explain what's going on here? How does this not cause collisions with other implementations of TaintTracking::Configuration actually used in the query?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is indeed unusual 😉 And yes, it collides with every other configuration. It's a hack to globally modify all other configurations with code that is itself configuration-dependent.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why use this vs implementing AdditionalTaintStep? Isn't the API for isAdditionalTaintStep exactly the same and doesn't creating implementations of AdditionalTaintStep have exactly the same effect?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. The Configuration::isAdditionalTaintStep predicate is configuration-specific, whereas AdditionalTaintStep is configuration-independent (i.e. global and applies to all configs).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would this collection data flow only be applied to TaintTracking and TaintTracking2? Why not all of them?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are only two copies TaintTracking at the moment, so this should be all of them.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Besides, this is a temporary measure, since it is quite costly in terms of performance.


override predicate isSource(DataFlow::Node n) { none() }

override predicate isSink(DataFlow::Node n) { none() }

private predicate needsTaintStore(RefType container, Type elem, Content f) {
exists(DataFlow::Node arg |
(isSink(arg) or isAdditionalTaintStep(arg, _)) and
(arg.asExpr() instanceof Argument or arg instanceof ArgumentNode) and
arg.getType() = container
or
needsTaintStore(_, container, _)
|
container.(Array).getComponentType() = elem and
f instanceof ArrayContent
or
container.(CollectionType).getElementType() = elem and
f instanceof CollectionContent
or
container.(MapType).getValueType() = elem and
f instanceof MapValueContent
)
}

override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(Content f, Type elem |
storeStep(node1, f, node2) and
needsTaintStore(_, elem, f) and
not exists(Type srctyp | srctyp = node1.getTypeBound() | not compatibleTypes(srctyp, elem))
)
}
}

private class StoreTaintConfig2 extends TaintTracking2::Configuration {
StoreTaintConfig2() { this instanceof TaintTracking2::Configuration or none() }

override predicate isSource(DataFlow::Node n) { none() }

override predicate isSink(DataFlow::Node n) { none() }

private predicate needsTaintStore(RefType container, Type elem, Content f) {
exists(DataFlow::Node arg |
(isSink(arg) or isAdditionalTaintStep(arg, _)) and
(arg.asExpr() instanceof Argument or arg instanceof ArgumentNode) and
arg.getType() = container
or
needsTaintStore(_, container, _)
|
container.(Array).getComponentType() = elem and
f instanceof ArrayContent
or
container.(CollectionType).getElementType() = elem and
f instanceof CollectionContent
or
container.(MapType).getValueType() = elem and
f instanceof MapValueContent
)
}

override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(Content f, Type elem |
storeStep(node1, f, node2) and
needsTaintStore(_, elem, f) and
not exists(Type srctyp | srctyp = node1.getTypeBound() | not compatibleTypes(srctyp, elem))
)
}
}
}

/**
* Holds if taint can flow in one local step from `src` to `sink` excluding
* local data flow steps. That is, `src` and `sink` are likely to represent
Expand All @@ -103,22 +194,8 @@ private predicate localAdditionalTaintExprStep(Expr src, Expr sink) {
or
sink.(AssignAddExpr).getSource() = src and sink.getType() instanceof TypeString
or
sink.(ArrayCreationExpr).getInit() = src
or
sink.(ArrayInit).getAnInit() = src
or
sink.(ArrayAccess).getArray() = src
or
sink.(LogicExpr).getAnOperand() = src
or
exists(EnhancedForStmt for, SsaExplicitUpdate v |
for.getExpr() = src and
v.getDefiningExpr() = for.getVariable() and
v.getAFirstUse() = sink
)
or
containerReturnValueStep(src, sink)
or
constructorStep(src, sink)
or
qualifierToMethodStep(src, sink)
Expand All @@ -141,12 +218,6 @@ private predicate localAdditionalTaintExprStep(Expr src, Expr sink) {
* This is restricted to cases where the step updates the value of `sink`.
*/
private predicate localAdditionalTaintUpdateStep(Expr src, Expr sink) {
exists(Assignment assign | assign.getSource() = src |
sink = assign.getDest().(ArrayAccess).getArray()
)
or
containerUpdateStep(src, sink)
or
qualifierToArgumentStep(src, sink)
or
argToArgStep(src, sink)
Expand Down
19 changes: 11 additions & 8 deletions java/ql/src/semmle/code/java/frameworks/ApacheHttp.qll
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,17 @@ private class ApacheHttpFlowStep extends SummaryModelCsv {
"org.apache.http.util;EncodingUtils;true;getAsciiString;;;Argument[0];ReturnValue;taint",
"org.apache.http.util;EncodingUtils;true;getBytes;(String,String);;Argument[0];ReturnValue;taint",
"org.apache.http.util;EncodingUtils;true;getString;;;Argument[0];ReturnValue;taint",
"org.apache.http.util;Args;true;containsNoBlanks;(T,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notNull;(T,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notEmpty;(T,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notBlank;(T,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;containsNoBlanks;(T,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notNull;(T,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notEmpty;(T,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notBlank;(T,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;containsNoBlanks;(CharSequence,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notNull;(Object,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notEmpty;(CharSequence,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notEmpty;(Collection,String);;Argument[0];ReturnValue;value",
"org.apache.http.util;Args;true;notBlank;(CharSequence,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;containsNoBlanks;(CharSequence,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notNull;(Object,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notEmpty;(Collection,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notEmpty;(CharSequence,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notEmpty;(Object,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.util;Args;true;notBlank;(CharSequence,String);;Argument[0];ReturnValue;value",
"org.apache.hc.core5.http.io.entity;HttpEntities;true;create;;;Argument[0];ReturnValue;taint",
"org.apache.hc.core5.http.io.entity;HttpEntities;true;createGzipped;;;Argument[0];ReturnValue;taint",
"org.apache.hc.core5.http.io.entity;HttpEntities;true;createUrlEncoded;;;Argument[0];ReturnValue;taint",
Expand Down
Loading