/* An experimental state machine, for tracking "taint": unsanitized uses
of data potentially under an attacker's control.
Copyright (C) 2019-2020 Free Software Foundation, Inc.
Contributed by David Malcolm <dmalcolm@redhat.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "options.h"
#include "diagnostic-path.h"
#include "diagnostic-metadata.h"
#include "function.h"
#include "analyzer/analyzer.h"
#include "diagnostic-event-id.h"
#include "analyzer/analyzer-logging.h"
#include "analyzer/sm.h"
#include "analyzer/pending-diagnostic.h"
#if ENABLE_ANALYZER
namespace ana {
namespace {
/* An experimental state machine, for tracking "taint": unsanitized uses
of data potentially under an attacker's control. */
class taint_state_machine : public state_machine
{
public:
taint_state_machine (logger *logger);
bool inherited_state_p () const FINAL OVERRIDE { return true; }
bool on_stmt (sm_context *sm_ctxt,
const supernode *node,
const gimple *stmt) const FINAL OVERRIDE;
void on_condition (sm_context *sm_ctxt,
const supernode *node,
const gimple *stmt,
tree lhs,
enum tree_code op,
tree rhs) const FINAL OVERRIDE;
bool can_purge_p (state_t s) const FINAL OVERRIDE;
/* Start state. */
state_t m_start;
/* State for a "tainted" value: unsanitized data potentially under an
attacker's control. */
state_t m_tainted;
/* State for a "tainted" value that has a lower bound. */
state_t m_has_lb;
/* State for a "tainted" value that has an upper bound. */
state_t m_has_ub;
/* Stop state, for a value we don't want to track any more. */
state_t m_stop;
};
enum bounds
{
BOUNDS_NONE,
BOUNDS_UPPER,
BOUNDS_LOWER
};
class tainted_array_index
: public pending_diagnostic_subclass<tainted_array_index>
{
public:
tainted_array_index (const taint_state_machine &sm, tree arg,
enum bounds has_bounds)
: m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {}
const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
bool operator== (const tainted_array_index &other) const
{
return same_tree_p (m_arg, other.m_arg);
}
bool emit (rich_location *rich_loc) FINAL OVERRIDE
{
diagnostic_metadata m;
m.add_cwe (129);
switch (m_has_bounds)
{
default:
gcc_unreachable ();
case BOUNDS_NONE:
return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
"use of tainted value %qE in array lookup"
" without bounds checking",
m_arg);
break;
case BOUNDS_UPPER:
return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
"use of tainted value %qE in array lookup"
" without lower-bounds checking",
m_arg);
break;
case BOUNDS_LOWER:
return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
"use of tainted value %qE in array lookup"
" without upper-bounds checking",
m_arg);
break;
}
}
label_text describe_state_change (const evdesc::state_change &change)
FINAL OVERRIDE
{
if (change.m_new_state == m_sm.m_tainted)
{
if (change.m_origin)
return change.formatted_print ("%qE has an unchecked value here"
" (from %qE)",
change.m_expr, change.m_origin);
else
return change.formatted_print ("%qE gets an unchecked value here",
change.m_expr);
}
else if (change.m_new_state == m_sm.m_has_lb)
return change.formatted_print ("%qE has its lower bound checked here",
change.m_expr);
else if (change.m_new_state == m_sm.m_has_ub)
return change.formatted_print ("%qE has its upper bound checked here",
change.m_expr);
return label_text ();
}
label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
{
switch (m_has_bounds)
{
default:
gcc_unreachable ();
case BOUNDS_NONE:
return ev.formatted_print ("use of tainted value %qE in array lookup"
" without bounds checking",
m_arg);
case BOUNDS_UPPER:
return ev.formatted_print ("use of tainted value %qE in array lookup"
" without lower-bounds checking",
m_arg);
case BOUNDS_LOWER:
return ev.formatted_print ("use of tainted value %qE in array lookup"
" without upper-bounds checking",
m_arg);
}
}
private:
const taint_state_machine &m_sm;
tree m_arg;
enum bounds m_has_bounds;
};
/* taint_state_machine's ctor. */
taint_state_machine::taint_state_machine (logger *logger)
: state_machine ("taint", logger)
{
m_start = add_state ("start");
m_tainted = add_state ("tainted");
m_has_lb = add_state ("has_lb");
m_has_ub = add_state ("has_ub");
m_stop = add_state ("stop");
}
/* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
bool
taint_state_machine::on_stmt (sm_context *sm_ctxt,
const supernode *node,
const gimple *stmt) const
{
if (const gcall *call = dyn_cast <const gcall *> (stmt))
if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
{
if (is_named_call_p (callee_fndecl, "fread", call, 4))
{
tree arg = gimple_call_arg (call, 0);
arg = sm_ctxt->get_readable_tree (arg);
sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
/* Dereference an ADDR_EXPR. */
// TODO: should the engine do this?
if (TREE_CODE (arg) == ADDR_EXPR)
sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
m_start, m_tainted);
return true;
}
}
// TODO: ...etc; many other sources of untrusted data
if (const gassign *assign = dyn_cast <const gassign *> (stmt))
{
tree rhs1 = gimple_assign_rhs1 (assign);
enum tree_code op = gimple_assign_rhs_code (assign);
/* Check array accesses. */
if (op == ARRAY_REF)
{
tree arg = TREE_OPERAND (rhs1, 1);
arg = sm_ctxt->get_readable_tree (arg);
/* Unsigned types have an implicit lower bound. */
bool is_unsigned = false;
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg));
/* Complain about missing bounds. */
sm_ctxt->warn_for_state
(node, stmt, arg, m_tainted,
new tainted_array_index (*this, arg,
is_unsigned
? BOUNDS_LOWER : BOUNDS_NONE));
sm_ctxt->on_transition (node, stmt, arg, m_tainted, m_stop);
/* Complain about missing upper bound. */
sm_ctxt->warn_for_state (node, stmt, arg, m_has_lb,
new tainted_array_index (*this, arg,
BOUNDS_LOWER));
sm_ctxt->on_transition (node, stmt, arg, m_has_lb, m_stop);
/* Complain about missing lower bound. */
if (!is_unsigned)
{
sm_ctxt->warn_for_state (node, stmt, arg, m_has_ub,
new tainted_array_index (*this, arg,
BOUNDS_UPPER));
sm_ctxt->on_transition (node, stmt, arg, m_has_ub, m_stop);
}
}
}
return false;
}
/* Implementation of state_machine::on_condition vfunc for taint_state_machine.
Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
and states 'has_ub' and 'has_lb' to 'stop'. */
void
taint_state_machine::on_condition (sm_context *sm_ctxt,
const supernode *node,
const gimple *stmt,
tree lhs,
enum tree_code op,
tree rhs ATTRIBUTE_UNUSED) const
{
if (stmt == NULL)
return;
// TODO: this doesn't use the RHS; should we make it symmetric?
// TODO
switch (op)
{
//case NE_EXPR:
//case EQ_EXPR:
case GE_EXPR:
case GT_EXPR:
{
sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
m_has_lb);
sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
m_stop);
}
break;
case LE_EXPR:
case LT_EXPR:
{
sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
m_has_ub);
sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
m_stop);
}
break;
default:
break;
}
}
bool
taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
{
return true;
}
} // anonymous namespace
/* Internal interface to this file. */
state_machine *
make_taint_state_machine (logger *logger)
{
return new taint_state_machine (logger);
}
} // namespace ana
#endif /* #if ENABLE_ANALYZER */