12 年之前 · 5d27a51288
--- a/docs/DataFlowSanitizer.rst
+++ b/docs/DataFlowSanitizer.rst
@@ -2,6 +2,11 @@
 
															 DataFlowSanitizer
														
 
															 =================
														
 
															+.. toctree::
														
 
															+   :hidden:
														
 
															+
														
 
															+   DataFlowSanitizerDesign
														
 
															+
														
 
															 .. contents::
														
 
															    :local:
														
@@ -28,6 +33,82 @@ The APIs are defined in the header file ``sanitizer/dfsan_interface.h``.
 
															 For further information about each function, please refer to the header
														
 
															 file.
														
 
															+ABI List
														
 
															+--------
														
 
															+
														
 
															+DataFlowSanitizer uses a list of functions known as an ABI list to decide
														
 
															+whether a call to a specific function should use the operating system's native
														
 
															+ABI or whether it should use a variant of this ABI that also propagates labels
														
 
															+through function parameters and return values.  The ABI list file also controls
														
 
															+how labels are propagated in the former case.  DataFlowSanitizer comes with a
														
 
															+default ABI list which is intended to eventually cover the glibc library on
														
 
															+Linux but it may become necessary for users to extend the ABI list in cases
														
 
															+where a particular library or function cannot be instrumented (e.g. because
														
 
															+it is implemented in assembly or another language which DataFlowSanitizer does
														
 
															+not support) or a function is called from a library or function which cannot
														
 
															+be instrumented.
														
 
															+
														
 
															+DataFlowSanitizer's ABI list file is a :doc:`SanitizerSpecialCaseList`.
														
 
															+The pass treats every function in the ``uninstrumented`` category in the
														
 
															+ABI list file as conforming to the native ABI.  Unless the ABI list contains
														
 
															+additional categories for those functions, a call to one of those functions
														
 
															+will produce a warning message, as the labelling behavior of the function
														
 
															+is unknown.  The other supported categories are ``discard``, ``functional``
														
 
															+and ``custom``.
														
 
															+
														
 
															+* ``discard`` -- To the extent that this function writes to (user-accessible)
														
 
															+  memory, it also updates labels in shadow memory (this condition is trivially
														
 
															+  satisfied for functions which do not write to user-accessible memory).  Its
														
 
															+  return value is unlabelled.
														
 
															+* ``functional`` -- Like ``discard``, except that the label of its return value
														
 
															+  is the union of the label of its arguments.
														
 
															+* ``custom`` -- Instead of calling the function, a custom wrapper ``__dfsw_F``
														
 
															+  is called, where ``F`` is the name of the function.  This function may wrap
														
 
															+  the original function or provide its own implementation.  This category is
														
 
															+  generally used for uninstrumentable functions which write to user-accessible
														
 
															+  memory or which have more complex label propagation behavior.  The signature
														
 
															+  of ``__dfsw_F`` is based on that of ``F`` with each argument having a
														
 
															+  label of type ``dfsan_label`` appended to the argument list.  If ``F``
														
 
															+  is of non-void return type a final argument of type ``dfsan_label *``
														
 
															+  is appended to which the custom function can store the label for the
														
 
															+  return value.  For example:
														
 
															+
														
 
															+.. code-block:: c++
														
 
															+
														
 
															+  void f(int x);
														
 
															+  void __dfsw_f(int x, dfsan_label x_label);
														
 
															+
														
 
															+  void *memcpy(void *dest, const void *src, size_t n);
														
 
															+  void *__dfsw_memcpy(void *dest, const void *src, size_t n,
														
 
															+                      dfsan_label dest_label, dfsan_label src_label,
														
 
															+                      dfsan_label n_label, dfsan_label *ret_label);
														
 
															+
														
 
															+If a function defined in the translation unit being compiled belongs to the
														
 
															+``uninstrumented`` category, it will be compiled so as to conform to the
														
 
															+native ABI.  Its arguments will be assumed to be unlabelled, but it will
														
 
															+propagate labels in shadow memory.
														
 
															+
														
 
															+For example:
														
 
															+
														
 
															+.. code-block:: none
														
 
															+
														
 
															+  # main is called by the C runtime using the native ABI.
														
 
															+  fun:main=uninstrumented
														
 
															+  fun:main=discard
														
 
															+
														
 
															+  # malloc only writes to its internal data structures, not user-accessible memory.
														
 
															+  fun:malloc=uninstrumented
														
 
															+  fun:malloc=discard
														
 
															+
														
 
															+  # tolower is a pure function.
														
 
															+  fun:tolower=uninstrumented
														
 
															+  fun:tolower=functional
														
 
															+
														
 
															+  # memcpy needs to copy the shadow from the source to the destination region.
														
 
															+  # This is done in a custom function.
														
 
															+  fun:memcpy=uninstrumented
														
 
															+  fun:memcpy=custom
														
 
															+
														
 
															 Example
														
 
															 =======
														
--- a/docs/DataFlowSanitizerDesign.rst
+++ b/docs/DataFlowSanitizerDesign.rst
@@ -140,3 +140,68 @@ associated directly with registers.  Loads will result in a union of
 
															 all shadow labels corresponding to bytes loaded (which most of the
														
 
															 time will be short circuited by the initial comparison) and stores will
														
 
															 result in a copy of the label to the shadow of all bytes stored to.
														
 
															+
														
 
															+Propagating labels through arguments
														
 
															+------------------------------------
														
 
															+
														
 
															+In order to propagate labels through function arguments and return values,
														
 
															+DataFlowSanitizer changes the ABI of each function in the translation unit.
														
 
															+There are currently two supported ABIs:
														
 
															+
														
 
															+* Args -- Argument and return value labels are passed through additional
														
 
															+  arguments and by modifying the return type.
														
 
															+
														
 
															+* TLS -- Argument and return value labels are passed through TLS variables
														
 
															+  ``__dfsan_arg_tls`` and ``__dfsan_retval_tls``.
														
 
															+
														
 
															+The main advantage of the TLS ABI is that it is more tolerant of ABI mismatches
														
 
															+(TLS storage is not shared with any other form of storage, whereas extra
														
 
															+arguments may be stored in registers which under the native ABI are not used
														
 
															+for parameter passing and thus could contain arbitrary values).  On the other
														
 
															+hand the args ABI is more efficient and allows ABI mismatches to be more easily
														
 
															+identified by checking for nonzero labels in nominally unlabelled programs.
														
 
															+
														
 
															+Implementing the ABI list
														
 
															+-------------------------
														
 
															+
														
 
															+The `ABI list <DataFlowSanitizer.html#abi-list>`_ provides a list of functions
														
 
															+which conform to the native ABI, each of which is callable from an instrumented
														
 
															+program.  This is implemented by replacing each reference to a native ABI
														
 
															+function with a reference to a function which uses the instrumented ABI.
														
 
															+Such functions are automatically-generated wrappers for the native functions.
														
 
															+For example, given the ABI list example provided in the user manual, the
														
 
															+following wrappers will be generated under the args ABI:
														
 
															+
														
 
															+.. code-block:: llvm
														
 
															+
														
 
															+    define linkonce_odr { i8*, i16 } @"dfsw$malloc"(i64 %0, i16 %1) {
														
 
															+    entry:
														
 
															+      %2 = call i8* @malloc(i64 %0)
														
 
															+      %3 = insertvalue { i8*, i16 } undef, i8* %2, 0
														
 
															+      %4 = insertvalue { i8*, i16 } %3, i16 0, 1
														
 
															+      ret { i8*, i16 } %4
														
 
															+    }
														
 
															+
														
 
															+    define linkonce_odr { i32, i16 } @"dfsw$tolower"(i32 %0, i16 %1) {
														
 
															+    entry:
														
 
															+      %2 = call i32 @tolower(i32 %0)
														
 
															+      %3 = insertvalue { i32, i16 } undef, i32 %2, 0
														
 
															+      %4 = insertvalue { i32, i16 } %3, i16 %1, 1
														
 
															+      ret { i32, i16 } %4
														
 
															+    }
														
 
															+
														
 
															+    define linkonce_odr { i8*, i16 } @"dfsw$memcpy"(i8* %0, i8* %1, i64 %2, i16 %3, i16 %4, i16 %5) {
														
 
															+    entry:
														
 
															+      %labelreturn = alloca i16
														
 
															+      %6 = call i8* @__dfsw_memcpy(i8* %0, i8* %1, i64 %2, i16 %3, i16 %4, i16 %5, i16* %labelreturn)
														
 
															+      %7 = load i16* %labelreturn
														
 
															+      %8 = insertvalue { i8*, i16 } undef, i8* %6, 0
														
 
															+      %9 = insertvalue { i8*, i16 } %8, i16 %7, 1
														
 
															+      ret { i8*, i16 } %9
														
 
															+    }
														
 
															+
														
 
															+As an optimization, direct calls to native ABI functions will call the
														
 
															+native ABI function directly and the pass will compute the appropriate label
														
 
															+internally.  This has the advantage of reducing the number of union operations
														
 
															+required when the return value label is known to be zero (i.e. ``discard``
														
 
															+functions, or ``functional`` functions with known unlabelled arguments).
														
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -21,6 +21,7 @@ Using Clang as a Compiler
 
															    AddressSanitizer
														
 
															    ThreadSanitizer
														
 
															    MemorySanitizer
														
 
															+   DataFlowSanitizer
														
 
															    SanitizerSpecialCaseList
														
 
															    Modules
														
 
															    FAQ
														
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -208,7 +208,10 @@ static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
 
															 static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
														
 
															                                      PassManagerBase &PM) {
														
 
															-  PM.add(createDataFlowSanitizerPass());
														
 
															+  const PassManagerBuilderWrapper &BuilderWrapper =
														
 
															+      static_cast<const PassManagerBuilderWrapper&>(Builder);
														
 
															+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
														
 
															+  PM.add(createDataFlowSanitizerPass(CGOpts.SanitizerBlacklistFile));
														
 
															 }
														
 
															 void EmitAssemblyHelper::CreatePasses(TargetMachine *TM) {
														
--- a/lib/Driver/SanitizerArgs.cpp
+++ b/lib/Driver/SanitizerArgs.cpp
@@ -307,6 +307,9 @@ bool SanitizerArgs::getDefaultBlacklistForKind(const Driver &D, unsigned Kind,
 
															     BlacklistFile = "msan_blacklist.txt";
														
 
															   else if (Kind & NeedsTsanRt)
														
 
															     BlacklistFile = "tsan_blacklist.txt";
														
 
															+  else if (Kind & NeedsDfsanRt)
														
 
															+    BlacklistFile = "dfsan_abilist.txt";
														
 
															+
														
 
															   if (BlacklistFile) {
														
 
															     SmallString<64> Path(D.ResourceDir);
														
 
															     llvm::sys::path::append(Path, BlacklistFile);