cprover
taint_analysis.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Taint Analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "taint_analysis.h"
13 
14 #include <iostream>
15 #include <fstream>
16 
17 #include <util/invariant.h>
18 #include <util/json.h>
19 #include <util/prefix.h>
20 #include <util/simplify_expr.h>
21 #include <util/string_constant.h>
22 
24 
26 
27 #include "taint_parser.h"
28 
30 {
31 public:
33  {
34  }
35 
36  bool operator()(
37  const std::string &taint_file_name,
38  const symbol_tablet &,
40  bool show_full,
41  const std::string &json_file_name);
42 
43 protected:
46 
47  void instrument(const namespacet &, goto_functionst &);
49 };
50 
52  const namespacet &ns,
53  goto_functionst &goto_functions)
54 {
55  for(auto &function : goto_functions.function_map)
56  instrument(ns, function.second);
57 }
58 
60  const namespacet &ns,
61  goto_functionst::goto_functiont &goto_function)
62 {
63  for(goto_programt::instructionst::iterator
64  it=goto_function.body.instructions.begin();
65  it!=goto_function.body.instructions.end();
66  it++)
67  {
68  const goto_programt::instructiont &instruction=*it;
69 
70  goto_programt insert_before, insert_after;
71 
72  switch(instruction.type)
73  {
74  case FUNCTION_CALL:
75  {
76  const code_function_callt &function_call=
77  to_code_function_call(instruction.code);
78  const exprt &function=function_call.function();
79 
80  if(function.id()==ID_symbol)
81  {
82  const irep_idt &identifier=
83  to_symbol_expr(function).get_identifier();
84 
85  std::set<irep_idt> identifiers;
86 
87  identifiers.insert(identifier);
88 
89  irep_idt class_id=function.get(ID_C_class);
90  if(class_id.empty())
91  {
92  }
93  else
94  {
95  std::string suffix=
96  std::string(
97  id2string(identifier), class_id.size(), std::string::npos);
98 
99  class_hierarchyt::idst parents=
101  for(const auto &p : parents)
102  identifiers.insert(id2string(p)+suffix);
103  }
104 
105  for(const auto &rule : taint.rules)
106  {
107  bool match=false;
108  for(const auto &i : identifiers)
109  if(i==rule.function_identifier ||
110  has_prefix(
111  id2string(i),
112  "java::"+id2string(rule.function_identifier)+":"))
113  {
114  match=true;
115  break;
116  }
117 
118  if(match)
119  {
120  debug() << "MATCH " << rule.id << " on " << identifier << eom;
121 
122  exprt where=nil_exprt();
123 
124  const code_typet &code_type=to_code_type(function.type());
125 
126  bool have_this=
127  !code_type.parameters().empty() &&
128  code_type.parameters().front().get_bool(ID_C_this);
129 
130  switch(rule.where)
131  {
133  {
134  const symbolt &return_value_symbol=
135  ns.lookup(id2string(identifier)+"#return_value");
136  where=return_value_symbol.symbol_expr();
137  }
138  break;
139 
141  {
142  unsigned nr=
143  have_this?rule.parameter_number:rule.parameter_number-1;
144  if(function_call.arguments().size()>nr)
145  where=function_call.arguments()[nr];
146  }
147  break;
148 
150  if(have_this)
151  {
153  !function_call.arguments().empty(),
154  "`this` implies at least one argument in function call");
155  where=function_call.arguments()[0];
156  }
157  break;
158  }
159 
160  switch(rule.kind)
161  {
163  {
164  codet code_set_may("set_may");
165  code_set_may.operands().resize(2);
166  code_set_may.op0()=where;
167  code_set_may.op1()=
168  address_of_exprt(string_constantt(rule.taint));
169  goto_programt::targett t=insert_after.add_instruction();
170  t->make_other(code_set_may);
171  t->source_location=instruction.source_location;
172  }
173  break;
174 
176  {
177  goto_programt::targett t=insert_before.add_instruction();
178  binary_predicate_exprt get_may("get_may");
179  get_may.op0()=where;
180  get_may.op1()=address_of_exprt(string_constantt(rule.taint));
181  t->make_assertion(not_exprt(get_may));
182  t->source_location=instruction.source_location;
183  t->source_location.set_property_class(
184  "taint rule "+id2string(rule.id));
185  t->source_location.set_comment(rule.message);
186  }
187  break;
188 
190  {
191  codet code_clear_may("clear_may");
192  code_clear_may.operands().resize(2);
193  code_clear_may.op0()=where;
194  code_clear_may.op1()=
195  address_of_exprt(string_constantt(rule.taint));
196  goto_programt::targett t=insert_after.add_instruction();
197  t->make_other(code_clear_may);
198  t->source_location=instruction.source_location;
199  }
200  break;
201  }
202  }
203  }
204  }
205  }
206  break;
207 
208  default:
209  {
210  }
211  }
212 
213  if(!insert_before.empty())
214  {
215  goto_function.body.insert_before_swap(it, insert_before);
216  // advance until we get back to the call
217  while(!it->is_function_call()) ++it;
218  }
219 
220  if(!insert_after.empty())
221  {
222  goto_function.body.destructive_insert(
223  std::next(it), insert_after);
224  }
225  }
226 }
227 
229  const std::string &taint_file_name,
230  const symbol_tablet &symbol_table,
231  goto_functionst &goto_functions,
232  bool show_full,
233  const std::string &json_file_name)
234 {
235  try
236  {
237  json_arrayt json_result;
238  bool use_json=!json_file_name.empty();
239 
240  status() << "Reading taint file `" << taint_file_name
241  << "'" << eom;
242 
243  if(taint_parser(taint_file_name, taint, get_message_handler()))
244  {
245  error() << "Failed to read taint definition file" << eom;
246  return true;
247  }
248 
249  status() << "Got " << taint.rules.size()
250  << " taint definitions" << eom;
251 
252  taint.output(debug());
253  debug() << eom;
254 
255  status() << "Instrumenting taint" << eom;
256 
257  class_hierarchy(symbol_table);
258 
259  const namespacet ns(symbol_table);
260  instrument(ns, goto_functions);
261  goto_functions.update();
262 
263  bool have_entry_point=
264  goto_functions.function_map.find(goto_functionst::entry_point())!=
265  goto_functions.function_map.end();
266 
267  // do we have an entry point?
268  if(have_entry_point)
269  {
270  status() << "Working from entry point" << eom;
271  }
272  else
273  {
274  status() << "No entry point found; "
275  << "we will consider the heads of all functions as reachable"
276  << eom;
277 
278  goto_programt end, gotos, calls;
279 
281 
282  forall_goto_functions(f_it, goto_functions)
283  if(f_it->second.body_available() &&
284  f_it->first!=goto_functionst::entry_point())
285  {
287  const code_function_callt call(ns.lookup(f_it->first).symbol_expr());
288  t->make_function_call(call);
289  calls.add_instruction()->make_goto(end.instructions.begin());
291  g->make_goto(t, side_effect_expr_nondett(bool_typet()));
292  }
293 
295  goto_functions.function_map[goto_functionst::entry_point()];
296 
297  goto_programt &body=entry.body;
298 
299  body.destructive_append(gotos);
300  body.destructive_append(calls);
301  body.destructive_append(end);
302 
303  goto_functions.update();
304  }
305 
306  status() << "Data-flow analysis" << eom;
307 
308  custom_bitvector_analysist custom_bitvector_analysis;
309  custom_bitvector_analysis(goto_functions, ns);
310 
311  if(show_full)
312  {
313  custom_bitvector_analysis.output(ns, goto_functions, std::cout);
314  return false;
315  }
316 
317  forall_goto_functions(f_it, goto_functions)
318  {
319  if(!f_it->second.body.has_assertion())
320  continue;
321 
322  const symbolt &symbol=ns.lookup(f_it->first);
323 
324  if(f_it->first=="__actual_thread_spawn")
325  continue;
326 
327  bool first=true;
328 
329  forall_goto_program_instructions(i_it, f_it->second.body)
330  {
331  if(!i_it->is_assert())
332  continue;
334  continue;
335 
336  if(custom_bitvector_analysis[i_it].has_values.is_false())
337  continue;
338 
339  exprt result=custom_bitvector_analysis.eval(i_it->guard, i_it);
340  exprt result2=simplify_expr(result, ns);
341 
342  if(result2.is_true())
343  continue;
344 
345  if(first)
346  {
347  first=false;
348  if(!use_json)
349  std::cout << "\n"
350  << "******** Function " << symbol.display_name() << '\n';
351  }
352 
353  if(use_json)
354  {
356  json["bugClass"] =
357  json_stringt(i_it->source_location.get_property_class());
358  json["file"] = json_stringt(i_it->source_location.get_file());
359  json["line"]=
360  json_numbert(id2string(i_it->source_location.get_line()));
361  json_result.array.push_back(json);
362  }
363  else
364  {
365  std::cout << i_it->source_location;
366  if(!i_it->source_location.get_comment().empty())
367  std::cout << ": " << i_it->source_location.get_comment();
368 
369  if(!i_it->source_location.get_property_class().empty())
370  std::cout << " ("
371  << i_it->source_location.get_property_class() << ")";
372 
373  std::cout << '\n';
374  }
375  }
376  }
377 
378  if(use_json)
379  {
380  std::ofstream json_out(json_file_name);
381 
382  if(!json_out)
383  {
384  error() << "Failed to open json output `"
385  << json_file_name << "'" << eom;
386  return true;
387  }
388 
389  status() << "Analysis result is written to `"
390  << json_file_name << "'" << eom;
391 
392  json_out << json_result << '\n';
393  }
394 
395  return false;
396  }
397  catch(const char *error_msg)
398  {
399  error() << error_msg << eom;
400  return true;
401  }
402  catch(const std::string &error_msg)
403  {
404  error() << error_msg << eom;
405  return true;
406  }
407  catch(...)
408  {
409  error() << "Caught unexpected error in taint_analysist::operator()" << eom;
410  return true;
411  }
412 }
413 
415  goto_modelt &goto_model,
416  const std::string &taint_file_name,
417  message_handlert &message_handler,
418  bool show_full,
419  const std::string &json_file_name)
420 {
422  taint_analysis.set_message_handler(message_handler);
423  return taint_analysis(
424  taint_file_name,
425  goto_model.symbol_table,
426  goto_model.goto_functions,
427  show_full,
428  json_file_name);
429 }
taint_analysis
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const std::string &json_file_name)
Definition: taint_analysis.cpp:414
messaget
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:144
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:35
json_numbert
Definition: json.h:235
taint_parse_treet::rules
rulest rules
Definition: taint_parser.h:62
symbol_tablet
The symbol table.
Definition: symbol_table.h:19
goto_programt::instructiont::source_location
source_locationt source_location
The location of the instruction in the source file.
Definition: goto_program.h:187
class_hierarchyt
Non-graph-based representation of the class hierarchy.
Definition: class_hierarchy.h:42
json
json_objectt json(const source_locationt &location)
Definition: json_expr.cpp:87
taint_analysist::class_hierarchy
class_hierarchyt class_hierarchy
Definition: taint_analysis.cpp:45
DATA_INVARIANT
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:485
symbolt::display_name
const irep_idt & display_name() const
Return language specific display name if present.
Definition: symbol.h:55
messaget::status
mstreamt & status() const
Definition: message.h:401
taint_analysist::instrument
void instrument(const namespacet &, goto_functionst &)
Definition: taint_analysis.cpp:51
custom_bitvector_analysis.h
taint_parser.h
taint_parse_treet
Definition: taint_parser.h:22
goto_programt::instructiont::type
goto_program_instruction_typet type
What kind of instruction?
Definition: goto_program.h:190
taint_analysist
Definition: taint_analysis.cpp:29
prefix.h
taint_analysis.h
string_constant.h
goto_programt::empty
bool empty() const
Is the program empty?
Definition: goto_program.h:626
taint_parse_treet::rulet::RETURN_VALUE
@ RETURN_VALUE
Definition: taint_parser.h:29
exprt
Base class for all expressions.
Definition: expr.h:54
goto_modelt
Definition: goto_model.h:24
exprt::op0
exprt & op0()
Definition: expr.h:84
invariant.h
custom_bitvector_analysist::eval
exprt eval(const exprt &src, locationt loc)
Definition: custom_bitvector_analysis.h:159
bool_typet
The Boolean type.
Definition: std_types.h:28
messaget::eom
static eomt eom
Definition: message.h:284
exprt::is_true
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:90
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
simplify_expr
exprt simplify_expr(const exprt &src, const namespacet &ns)
Definition: simplify_expr.cpp:2325
string_constantt
Definition: string_constant.h:15
taint_analysist::taint
taint_parse_treet taint
Definition: taint_analysis.cpp:44
json_arrayt
Definition: json.h:146
json_objectt
Definition: json.h:244
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:93
goto_programt::add_instruction
targett add_instruction()
Adds an instruction at the end.
Definition: goto_program.h:541
namespacet::lookup
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:166
code_function_callt
codet representation of a function call statement.
Definition: std_code.h:1036
to_code_type
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:982
messaget::result
mstreamt & result() const
Definition: message.h:396
messaget::error
mstreamt & error() const
Definition: message.h:386
taint_parse_treet::rulet::THIS
@ THIS
Definition: taint_parser.h:29
taint_analysist::operator()
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const std::string &json_file_name)
Definition: taint_analysis.cpp:228
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
goto_programt::instructiont::code
codet code
Definition: goto_program.h:181
ai_baset::output
virtual void output(const namespacet &ns, const goto_functionst &goto_functions, std::ostream &out) const
Output the abstract states for a whole program.
Definition: ai.cpp:24
binary_predicate_exprt
A base class for expressions that are predicates, i.e., Boolean-typed, and that take exactly two argu...
Definition: std_expr.h:835
symbol_exprt::get_identifier
const irep_idt & get_identifier() const
Definition: std_expr.h:176
nil_exprt
The NIL expression.
Definition: std_expr.h:4461
symbolt::symbol_expr
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
exprt::op1
exprt & op1()
Definition: expr.h:87
taint_parse_treet::rulet::SANITIZER
@ SANITIZER
Definition: taint_parser.h:28
custom_bitvector_analysist
Definition: custom_bitvector_analysis.h:151
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:251
code_typet
Base type of functions.
Definition: std_types.h:751
message_handlert
Definition: message.h:24
class_hierarchyt::idst
std::vector< irep_idt > idst
Definition: class_hierarchy.h:45
to_code_function_call
const code_function_callt & to_code_function_call(const codet &code)
Definition: std_code.h:1173
taint_parser
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
Definition: taint_parser.cpp:20
dstringt::empty
bool empty() const
Definition: dstring.h:75
taint_parse_treet::rulet::PARAMETER
@ PARAMETER
Definition: taint_parser.h:29
END_FUNCTION
@ END_FUNCTION
Definition: goto_program.h:42
code_typet::parameters
const parameterst & parameters() const
Definition: std_types.h:893
taint_analysist::taint_analysist
taint_analysist()
Definition: taint_analysis.cpp:32
code_function_callt::arguments
argumentst & arguments()
Definition: std_code.h:1109
goto_programt::destructive_append
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:524
simplify_expr.h
side_effect_expr_nondett
A side_effect_exprt that returns a non-deterministically chosen value.
Definition: std_code.h:1633
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:25
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:420
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:22
custom_bitvector_domaint::has_get_must_or_may
static bool has_get_must_or_may(const exprt &)
Definition: custom_bitvector_analysis.cpp:672
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:174
class_hierarchy.h
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:32
taint_parse_treet::output
void output(std::ostream &) const
Definition: taint_parser.cpp:150
jsont::array
arrayt array
Definition: json.h:129
symbolt
Symbol table entry.
Definition: symbol.h:27
goto_functionst::update
void update()
Definition: goto_functions.h:91
json.h
FUNCTION_CALL
@ FUNCTION_CALL
Definition: goto_program.h:49
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:72
taint_parse_treet::rulet::SOURCE
@ SOURCE
Definition: taint_parser.h:28
forall_goto_functions
#define forall_goto_functions(it, functions)
Definition: goto_functions.h:149
has_prefix
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
exprt::operands
operandst & operands()
Definition: expr.h:78
messaget::debug
mstreamt & debug() const
Definition: message.h:416
goto_functionst::entry_point
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
Definition: goto_functions.h:101
class_hierarchyt::get_parents_trans
idst get_parents_trans(const irep_idt &id) const
Definition: class_hierarchy.h:76
address_of_exprt
Operator to return the address of an object.
Definition: std_expr.h:3255
taint_parse_treet::rulet::SINK
@ SINK
Definition: taint_parser.h:28
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:178
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:29
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:414
code_function_callt::function
exprt & function()
Definition: std_code.h:1099
forall_goto_program_instructions
#define forall_goto_program_instructions(it, program)
Definition: goto_program.h:804
dstringt::size
size_t size() const
Definition: dstring.h:91
json_stringt
Definition: json.h:214
source_locationt::set_property_class
void set_property_class(const irep_idt &property_class)
Definition: source_location.h:132
codet
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:34
not_exprt
Boolean negation.
Definition: std_expr.h:3308