Bladeren bron

Merge branch 'master' of https://github.com/llvm-mirror/clang

HOLZSCHUCH Nicolas 7 jaren geleden
bovenliggende
commit
ec13c3d331
98 gewijzigde bestanden met toevoegingen van 1180 en 901 verwijderingen
  1. 9 0
      docs/ReleaseNotes.rst
  2. 10 0
      docs/UsersManual.rst
  3. 1 1
      examples/clang-interpreter/Manager.h
  4. 2 0
      include/clang/Basic/DiagnosticParseKinds.td
  5. 2 0
      include/clang/Basic/DiagnosticSemaKinds.td
  6. 2 2
      include/clang/Basic/SourceManager.h
  7. 17 0
      include/clang/Basic/arm_neon.td
  8. 3 0
      include/clang/Basic/arm_neon_incl.td
  9. 5 0
      include/clang/Driver/Options.td
  10. 6 0
      include/clang/Frontend/CodeGenOptions.def
  11. 22 1
      include/clang/Frontend/MultiplexConsumer.h
  12. 2 3
      include/clang/Parse/Parser.h
  13. 2 2
      include/clang/Sema/Sema.h
  14. 16 11
      lib/AST/ItaniumMangle.cpp
  15. 17 3
      lib/Basic/FileManager.cpp
  16. 6 0
      lib/Basic/Targets/ARM.cpp
  17. 1 0
      lib/Basic/Targets/ARM.h
  18. 2 2
      lib/Basic/VirtualFileSystem.cpp
  19. 19 101
      lib/CodeGen/CGBuiltin.cpp
  20. 15 0
      lib/CodeGen/CGCall.cpp
  21. 4 2
      lib/CodeGen/CGCleanup.cpp
  22. 2 0
      lib/CodeGen/CGDecl.cpp
  23. 12 6
      lib/CodeGen/CGOpenCLRuntime.cpp
  24. 7 3
      lib/CodeGen/CGOpenCLRuntime.h
  25. 13 1
      lib/CodeGen/CodeGenFunction.h
  26. 1 1
      lib/Driver/ToolChain.cpp
  27. 6 1
      lib/Driver/ToolChains/Clang.cpp
  28. 1 1
      lib/Driver/ToolChains/CommonArgs.cpp
  29. 2 2
      lib/Driver/ToolChains/MSVC.cpp
  30. 1 1
      lib/Driver/ToolChains/PS4CPU.cpp
  31. 21 4
      lib/Format/TokenAnnotator.cpp
  32. 2 0
      lib/Frontend/CompilerInvocation.cpp
  33. 1 1
      lib/Frontend/InitHeaderSearch.cpp
  34. 21 3
      lib/Frontend/ModuleDependencyCollector.cpp
  35. 0 24
      lib/Frontend/MultiplexConsumer.cpp
  36. 1 1
      lib/Frontend/PrecompiledPreamble.cpp
  37. 1 1
      lib/Lex/PPDirectives.cpp
  38. 1 2
      lib/Parse/ParseDecl.cpp
  39. 3 2
      lib/Parse/ParseDeclCXX.cpp
  40. 1 1
      lib/Parse/ParseExpr.cpp
  41. 54 28
      lib/Parse/ParseExprCXX.cpp
  42. 2 4
      lib/Parse/ParseOpenMP.cpp
  43. 1 1
      lib/Parse/ParseStmtAsm.cpp
  44. 1 1
      lib/Parse/Parser.cpp
  45. 17 0
      lib/Sema/SemaChecking.cpp
  46. 1 1
      lib/Sema/SemaDecl.cpp
  47. 5 2
      lib/Sema/SemaExpr.cpp
  48. 7 7
      lib/Sema/SemaExprMember.cpp
  49. 2 2
      lib/Sema/SemaTemplate.cpp
  50. 6 0
      lib/Serialization/ASTReaderDecl.cpp
  51. 39 2
      lib/StaticAnalyzer/Checkers/CStringChecker.cpp
  52. 31 6
      lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp
  53. 4 2
      lib/StaticAnalyzer/Core/ExprEngine.cpp
  54. 44 10
      test/Analysis/autoreleasewritechecker_test.m
  55. 18 0
      test/Analysis/new.cpp
  56. 4 1
      test/CXX/drs/dr1xx.cpp
  57. 2 2
      test/CXX/drs/dr4xx.cpp
  58. 1 1
      test/CXX/temp/temp.res/p3.cpp
  59. 117 0
      test/CodeGen/aarch64-neon-dot-product.c
  60. 76 0
      test/CodeGen/arm-neon-dot-product.c
  61. 8 58
      test/CodeGen/avx2-builtins.c
  62. 39 205
      test/CodeGen/avx512bw-builtins.c
  63. 34 237
      test/CodeGen/avx512vlbw-builtins.c
  64. 14 0
      test/CodeGen/no-junk-ftrunc.c
  65. 8 58
      test/CodeGen/sse2-builtins.c
  66. 6 3
      test/CodeGenCXX/lambda-expressions-inside-auto-functions.cpp
  67. 18 0
      test/CodeGenCXX/lambda-to-function-pointer-conversion.cpp
  68. 20 0
      test/CodeGenObjCXX/arc-forwarded-lambda-call.mm
  69. 61 1
      test/CodeGenObjCXX/arc-special-member-functions.mm
  70. 27 1
      test/CodeGenObjCXX/lambda-expressions.mm
  71. 7 3
      test/CodeGenOpenCL/opencl_types.cl
  72. 40 26
      test/CodeGenOpenCL/pipe_builtin.cl
  73. 10 9
      test/CodeGenOpenCL/pipe_types.cl
  74. 2 0
      test/Driver/cxa-atexit.cpp
  75. 24 0
      test/Driver/fast-math.c
  76. 4 4
      test/Index/pipe-size.cl
  77. 1 1
      test/Misc/diag-format.c
  78. 5 0
      test/Modules/Inputs/class-extension/a-private.h
  79. 7 0
      test/Modules/Inputs/class-extension/a-proto.h
  80. 5 0
      test/Modules/Inputs/class-extension/a.h
  81. 11 0
      test/Modules/Inputs/class-extension/module.modulemap
  82. 9 0
      test/Modules/class-extension-protocol.m
  83. 3 2
      test/Parser/cxx-decl.cpp
  84. 5 0
      test/Preprocessor/arm-target-features.c
  85. 5 0
      test/Sema/builtins-x86.c
  86. 4 4
      test/SemaCXX/cxx1y-variable-templates_in_class.cpp
  87. 46 0
      test/SemaTemplate/template-id-expr.cpp
  88. 18 13
      tools/driver/cc1as_main.cpp
  89. 2 2
      tools/driver/driver.cpp
  90. 1 1
      tools/libclang/CIndex.cpp
  91. 3 3
      tools/libclang/CIndexer.cpp
  92. 2 2
      unittests/ASTMatchers/ASTMatchersInternalTest.cpp
  93. 7 7
      unittests/Basic/FileManagerTest.cpp
  94. 2 2
      unittests/Driver/ToolChainTest.cpp
  95. 12 1
      unittests/Format/FormatTestObjC.cpp
  96. 3 3
      unittests/Tooling/RefactoringTest.cpp
  97. 2 2
      unittests/Tooling/ToolingTest.cpp
  98. 13 0
      utils/TableGen/NeonEmitter.cpp

+ 9 - 0
docs/ReleaseNotes.rst

@@ -83,6 +83,15 @@ Non-comprehensive list of changes in this release
 New Compiler Flags
 ------------------
 
+- :option:`-ffp-cast-overflow-workaround` and
+  :option:`-fno-fp-cast-overflow-workaround`
+  enable (disable) a workaround for code that casts floating-point values to
+  integers and back to floating-point. If the floating-point value is not
+  representable in the intermediate integer type, the code is incorrect
+  according to the language standard. This flag will attempt to generate code
+  as if the result of an overflowing conversion matches the overflowing behavior
+  of a target's native float-to-int conversion instructions.
+
 - ...
 
 Deprecated Compiler Flags

+ 10 - 0
docs/UsersManual.rst

@@ -1255,6 +1255,16 @@ are listed below.
    flushed-to-zero number is preserved in the sign of 0, denormals are
    flushed to positive zero, respectively.
 
+.. option:: -f[no-]fp-cast-overflow-workaround
+
+   Enable a workaround for code that casts floating-point values to 
+   integers and back to floating-point. If the floating-point value 
+   is not representable in the intermediate integer type, the code is
+   incorrect according to the language standard. This flag will attempt 
+   to generate code as if the result of an overflowing conversion matches
+   the overflowing behavior of a target's native float-to-int conversion
+   instructions.
+
 .. option:: -fwhole-program-vtables
 
    Enable whole-program vtable optimizations, such as single-implementation

+ 1 - 1
examples/clang-interpreter/Manager.h

@@ -12,7 +12,7 @@
 
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 
-#if defined(LLVM_ON_WIN32) && defined(_WIN64)
+#if defined(_WIN32) && defined(_WIN64)
 #define CLANG_INTERPRETER_COFF_FORMAT
 #define CLANG_INTERPRETER_WIN_EXCEPTIONS
 #endif

+ 2 - 0
include/clang/Basic/DiagnosticParseKinds.td

@@ -633,6 +633,8 @@ def err_template_spec_syntax_non_template : Error<
   "<unused>|refers to a variable template|<unused>}1">;
 def err_id_after_template_in_nested_name_spec : Error<
   "expected template name after 'template' keyword in nested name specifier">;
+def err_unexpected_template_in_unqualified_id : Error<
+  "'template' keyword not permitted here">;
 def err_two_right_angle_brackets_need_space : Error<
   "a space is required between consecutive right angle brackets (use '> >')">;
 def err_right_angle_bracket_equal_needs_space : Error<

+ 2 - 0
include/clang/Basic/DiagnosticSemaKinds.td

@@ -8251,6 +8251,8 @@ def err_systemz_invalid_tabort_code : Error<
   "invalid transaction abort code">;
 def err_64_bit_builtin_32_bit_tgt : Error<
   "this builtin is only available on 64-bit targets">;
+def err_32_bit_builtin_64_bit_tgt : Error<
+  "this builtin is only available on 32-bit targets">;
 def err_builtin_x64_aarch64_only : Error<
   "this builtin is only available on x86-64 and aarch64 targets">;
 def err_ppc_builtin_only_on_pwr7 : Error<

+ 2 - 2
include/clang/Basic/SourceManager.h

@@ -389,8 +389,8 @@ namespace SrcMgr {
     static ExpansionInfo createForMacroArg(SourceLocation SpellingLoc,
                                            SourceLocation ExpansionLoc) {
       // We store an intentionally invalid source location for the end of the
-      // expansion range to mark that this is a macro argument ion rather than
-      // a normal one.
+      // expansion range to mark that this is a macro argument location rather
+      // than a normal one.
       return create(SpellingLoc, ExpansionLoc, SourceLocation());
     }
   };

+ 17 - 0
include/clang/Basic/arm_neon.td

@@ -199,6 +199,13 @@ def OP_SCALAR_HALF_SET_LNQ : Op<(bitcast "float16x8_t",
                                           (bitcast "int16_t", $p0),
                                           (bitcast "int16x8_t", $p1), $p2))>;
 
+def OP_DOT_LN
+    : Op<(call "vdot", $p0, $p1,
+          (bitcast $p1, (splat(bitcast "uint32x2_t", $p2), $p3)))>;
+def OP_DOT_LNQ
+    : Op<(call "vdot", $p0, $p1,
+          (bitcast $p1, (splat(bitcast "uint32x4_t", $p2), $p3)))>;
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -1579,3 +1586,13 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarc
   def SCALAR_VDUP_LANEH  : IInst<"vdup_lane", "sdi", "Sh">;
   def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "sji", "Sh">;
 }
+
+// v8.2-A dot product instructions.
+let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in {
+  def DOT : SInst<"vdot", "dd88", "iQiUiQUi">;
+  def DOT_LANE : SOpInst<"vdot_lane", "dd87i", "iUiQiQUi", OP_DOT_LN>;
+}
+let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in {
+  // Variants indexing into a 128-bit vector are A64 only.
+  def UDOT_LANEQ : SOpInst<"vdot_laneq", "dd89i", "iUiQiQUi", OP_DOT_LNQ>;
+}

+ 3 - 0
include/clang/Basic/arm_neon_incl.td

@@ -253,6 +253,9 @@ def OP_UNAVAILABLE : Operation {
 // B,C,D: array of default elts, force 'Q' size modifier.
 // p: pointer type
 // c: const pointer type
+// 7: vector of 8-bit elements, ignore 'Q' size modifier
+// 8: vector of 8-bit elements, same width as default type
+// 9: vector of 8-bit elements, force 'Q' size modifier
 
 // Every intrinsic subclasses Inst.
 class Inst <string n, string p, string t, Operation o> {

+ 5 - 0
include/clang/Driver/Options.td

@@ -1029,6 +1029,11 @@ def ffp_contract : Joined<["-"], "ffp-contract=">, Group<f_Group>,
   Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs): fast (everywhere)"
   " | on (according to FP_CONTRACT pragma, default) | off (never fuse)">, Values<"fast,on,off">;
 
+def ffp_cast_overflow_workaround : Flag<["-"],
+  "ffp-cast-overflow-workaround">, Group<f_Group>, Flags<[CC1Option]>;
+def fno_fp_cast_overflow_workaround : Flag<["-"],
+  "fno-fp-cast-overflow-workaround">, Group<f_Group>, Flags<[CC1Option]>;
+
 def ffor_scope : Flag<["-"], "ffor-scope">, Group<f_Group>;
 def fno_for_scope : Flag<["-"], "fno-for-scope">, Group<f_Group>;
 

+ 6 - 0
include/clang/Frontend/CodeGenOptions.def

@@ -136,6 +136,12 @@ CODEGENOPT(NoTrappingMath    , 1, 0) ///< Set when -fno-trapping-math is enabled
 CODEGENOPT(NoNaNsFPMath      , 1, 0) ///< Assume FP arguments, results not NaN.
 CODEGENOPT(FlushDenorm       , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
 CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+
+/// Disable a float-to-int-to-float cast optimization. This attempts to generate
+/// code as if the result of an overflowing conversion matches the overflowing
+/// behavior of a target's native float-to-int conversion instructions.
+CODEGENOPT(FPCastOverflowWorkaround, 1, 0)
+
 CODEGENOPT(UniformWGSize     , 1, 0) ///< -cl-uniform-work-group-size
 CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
 /// \brief Method of Objective-C dispatch to use.

+ 22 - 1
include/clang/Frontend/MultiplexConsumer.h

@@ -17,13 +17,34 @@
 
 #include "clang/Basic/LLVM.h"
 #include "clang/Sema/SemaConsumer.h"
+#include "clang/Serialization/ASTDeserializationListener.h"
 #include <memory>
 #include <vector>
 
 namespace clang {
 
 class MultiplexASTMutationListener;
-class MultiplexASTDeserializationListener;
+
+// This ASTDeserializationListener forwards its notifications to a set of
+// child listeners.
+class MultiplexASTDeserializationListener : public ASTDeserializationListener {
+public:
+  // Does NOT take ownership of the elements in L.
+  MultiplexASTDeserializationListener(
+      const std::vector<ASTDeserializationListener *> &L);
+  void ReaderInitialized(ASTReader *Reader) override;
+  void IdentifierRead(serialization::IdentID ID, IdentifierInfo *II) override;
+  void MacroRead(serialization::MacroID ID, MacroInfo *MI) override;
+  void TypeRead(serialization::TypeIdx Idx, QualType T) override;
+  void DeclRead(serialization::DeclID ID, const Decl *D) override;
+  void SelectorRead(serialization::SelectorID iD, Selector Sel) override;
+  void MacroDefinitionRead(serialization::PreprocessedEntityID,
+                           MacroDefinitionRecord *MD) override;
+  void ModuleRead(serialization::SubmoduleID ID, Module *Mod) override;
+
+private:
+  std::vector<ASTDeserializationListener *> Listeners;
+};
 
 // Has a list of ASTConsumers and calls each of them. Owns its children.
 class MultiplexConsumer : public SemaConsumer {

+ 2 - 3
include/clang/Parse/Parser.h

@@ -2544,12 +2544,11 @@ private:
   struct UsingDeclarator {
     SourceLocation TypenameLoc;
     CXXScopeSpec SS;
-    SourceLocation TemplateKWLoc;
     UnqualifiedId Name;
     SourceLocation EllipsisLoc;
 
     void clear() {
-      TypenameLoc = TemplateKWLoc = EllipsisLoc = SourceLocation();
+      TypenameLoc = EllipsisLoc = SourceLocation();
       SS.clear();
       Name.clear();
     }
@@ -2744,7 +2743,7 @@ public:
                           bool AllowConstructorName,
                           bool AllowDeductionGuide,
                           ParsedType ObjectType,
-                          SourceLocation& TemplateKWLoc,
+                          SourceLocation *TemplateKWLoc,
                           UnqualifiedId &Result);
 
 private:

+ 2 - 2
include/clang/Sema/Sema.h

@@ -6074,7 +6074,7 @@ public:
   TemplateNameKind isTemplateName(Scope *S,
                                   CXXScopeSpec &SS,
                                   bool hasTemplateKeyword,
-                                  UnqualifiedId &Name,
+                                  const UnqualifiedId &Name,
                                   ParsedType ObjectType,
                                   bool EnteringContext,
                                   TemplateTy &Template,
@@ -6244,7 +6244,7 @@ public:
 
   TemplateNameKind ActOnDependentTemplateName(
       Scope *S, CXXScopeSpec &SS, SourceLocation TemplateKWLoc,
-      UnqualifiedId &Name, ParsedType ObjectType, bool EnteringContext,
+      const UnqualifiedId &Name, ParsedType ObjectType, bool EnteringContext,
       TemplateTy &Template, bool AllowInjectedClassName = false);
 
   DeclResult

+ 16 - 11
lib/AST/ItaniumMangle.cpp

@@ -2329,7 +2329,8 @@ void CXXNameMangler::mangleObjCMethodName(const ObjCMethodDecl *MD) {
   Context.mangleObjCMethodName(MD, Out);
 }
 
-static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty) {
+static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty,
+                                ASTContext &Ctx) {
   if (Quals)
     return true;
   if (Ty->isSpecificBuiltinType(BuiltinType::ObjCSel))
@@ -2338,7 +2339,11 @@ static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty) {
     return true;
   if (Ty->isBuiltinType())
     return false;
-
+  // Through to Clang 6.0, we accidentally treated undeduced auto types as
+  // substitution candidates.
+  if (Ctx.getLangOpts().getClangABICompat() > LangOptions::ClangABI::Ver6 &&
+      isa<AutoType>(Ty))
+    return false;
   return true;
 }
 
@@ -2399,7 +2404,8 @@ void CXXNameMangler::mangleType(QualType T) {
   Qualifiers quals = split.Quals;
   const Type *ty = split.Ty;
 
-  bool isSubstitutable = isTypeSubstitutable(quals, ty);
+  bool isSubstitutable =
+    isTypeSubstitutable(quals, ty, Context.getASTContext());
   if (isSubstitutable && mangleSubstitution(T))
     return;
 
@@ -3250,14 +3256,13 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) {
 }
 
 void CXXNameMangler::mangleType(const AutoType *T) {
-  QualType D = T->getDeducedType();
-  // <builtin-type> ::= Da  # dependent auto
-  if (D.isNull()) {
-    assert(T->getKeyword() != AutoTypeKeyword::GNUAutoType &&
-           "shouldn't need to mangle __auto_type!");
-    Out << (T->isDecltypeAuto() ? "Dc" : "Da");
-  } else
-    mangleType(D);
+  assert(T->getDeducedType().isNull() &&
+         "Deduced AutoType shouldn't be handled here!");
+  assert(T->getKeyword() != AutoTypeKeyword::GNUAutoType &&
+         "shouldn't need to mangle __auto_type!");
+  // <builtin-type> ::= Da # auto
+  //                ::= Dc # decltype(auto)
+  Out << (T->isDecltypeAuto() ? "Dc" : "Da");
 }
 
 void CXXNameMangler::mangleType(const DeducedTemplateSpecializationType *T) {

+ 17 - 3
lib/Basic/FileManager.cpp

@@ -157,7 +157,7 @@ const DirectoryEntry *FileManager::getDirectory(StringRef DirName,
       DirName != llvm::sys::path::root_path(DirName) &&
       llvm::sys::path::is_separator(DirName.back()))
     DirName = DirName.substr(0, DirName.size()-1);
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   // Fixing a problem with "clang C:test.c" on Windows.
   // Stat("C:") does not recognize "C:" as a valid directory
   std::string DirNameStr;
@@ -534,9 +534,23 @@ StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
 
   StringRef CanonicalName(Dir->getName());
 
-  SmallString<256> CanonicalNameBuf;
-  if (!llvm::sys::fs::real_path(Dir->getName(), CanonicalNameBuf))
+#ifdef LLVM_ON_UNIX
+  char CanonicalNameBuf[PATH_MAX];
+  if (realpath(Dir->getName().str().c_str(), CanonicalNameBuf))
     CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
+#else
+  SmallString<256> CanonicalNameBuf(CanonicalName);
+  llvm::sys::fs::make_absolute(CanonicalNameBuf);
+  llvm::sys::path::native(CanonicalNameBuf);
+  // We've run into needing to remove '..' here in the wild though, so
+  // remove it.
+  // On Windows, symlinks are significantly less prevalent, so removing
+  // '..' is pretty safe.
+  // Ideally we'd have an equivalent of `realpath` and could implement
+  // sys::fs::canonical across all the platforms.
+  llvm::sys::path::remove_dots(CanonicalNameBuf, /* remove_dot_dot */ true);
+  CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
+#endif
 
   CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName));
   return CanonicalName;

+ 6 - 0
lib/Basic/Targets/ARM.cpp

@@ -390,6 +390,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
   Unaligned = 1;
   SoftFloat = SoftFloatABI = false;
   HWDiv = 0;
+  DotProd = 0;
 
   // This does not diagnose illegal cases like having both
   // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp".
@@ -432,6 +433,8 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HW_FP |= HW_FP_HP;
     } else if (Feature == "+fullfp16") {
       HasLegalHalfType = true;
+    } else if (Feature == "+dotprod") {
+      DotProd = true;
     }
   }
   HW_FP &= ~HW_FP_remove;
@@ -731,6 +734,9 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasLegalHalfType)
     Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1");
 
+  // Armv8.2-A dot product intrinsics
+  if (DotProd)
+    Builder.defineMacro("__ARM_FEATURE_DOTPROD", "1");
 
   switch (ArchKind) {
   default:

+ 1 - 0
lib/Basic/Targets/ARM.h

@@ -69,6 +69,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
   unsigned Crypto : 1;
   unsigned DSP : 1;
   unsigned Unaligned : 1;
+  unsigned DotProd : 1;
 
   enum {
     LDREX_B = (1 << 0), /// byte (8-bit)

+ 2 - 2
lib/Basic/VirtualFileSystem.cpp

@@ -967,7 +967,7 @@ class RedirectingFileSystem : public vfs::FileSystem {
   /// "." and "./" in their paths. FIXME: some unittests currently fail on
   /// win32 when using remove_dots and remove_leading_dotslash on paths.
   bool UseCanonicalizedPaths =
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
       false;
 #else
       true;
@@ -1560,7 +1560,7 @@ ErrorOr<Entry *> RedirectingFileSystem::lookupPath(const Twine &Path_) {
 ErrorOr<Entry *>
 RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
                                   sys::path::const_iterator End, Entry *From) {
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
   assert(!isTraversalComponent(*Start) &&
          !isTraversalComponent(From->getName()) &&
          "Paths should not contain traversal components");

+ 19 - 101
lib/CodeGen/CGBuiltin.cpp

@@ -3051,11 +3051,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
   case Builtin::BIget_pipe_num_packets:
   case Builtin::BIget_pipe_max_packets: {
-    const char *Name;
+    const char *BaseName;
+    const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
     if (BuiltinID == Builtin::BIget_pipe_num_packets)
-      Name = "__get_pipe_num_packets";
+      BaseName = "__get_pipe_num_packets";
     else
-      Name = "__get_pipe_max_packets";
+      BaseName = "__get_pipe_max_packets";
+    auto Name = std::string(BaseName) +
+                std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
 
     // Building the generic function prototype.
     Value *Arg0 = EmitScalarExpr(E->getArg(0));
@@ -3864,6 +3867,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
   NEONMAP0(vcvtq_u16_v),
   NEONMAP0(vcvtq_u32_v),
   NEONMAP0(vcvtq_u64_v),
+  NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
+  NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
   NEONMAP0(vext_v),
   NEONMAP0(vextq_v),
   NEONMAP0(vfma_v),
@@ -4058,6 +4063,8 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
+  NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
+  NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
   NEONMAP0(vext_v),
   NEONMAP0(vextq_v),
   NEONMAP0(vfma_v),
@@ -4971,6 +4978,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     }
     return SV;
   }
+  case NEON::BI__builtin_neon_vdot_v:
+  case NEON::BI__builtin_neon_vdotq_v: {
+    llvm::Type *InputTy =
+        llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
+    llvm::Type *Tys[2] = { Ty, InputTy };
+    Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
+  }
   }
 
   assert(Int && "Expected valid intrinsic number");
@@ -8449,76 +8464,6 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
 }
 
-// Emit addition or subtraction with saturation.
-// Handles both signed and unsigned intrinsics.
-static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, const CallExpr *E,
-                                   SmallVectorImpl<Value *> &Ops,
-                                   bool IsAddition, bool Signed) {
-
-  // Collect vector elements and type data.
-  llvm::Type *ResultType = CGF.ConvertType(E->getType());
-  int NumElements = ResultType->getVectorNumElements();
-  Value *Res;
-  if (!IsAddition && !Signed) {
-    Value *ICmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
-    Value *Select = CGF.Builder.CreateSelect(ICmp, Ops[0], Ops[1]);
-    Res = CGF.Builder.CreateSub(Select, Ops[1]);
-  } else {
-    unsigned EltSizeInBits = ResultType->getScalarSizeInBits();
-    llvm::Type *ExtElementType = EltSizeInBits == 8 ?
-                                 CGF.Builder.getInt16Ty() :
-                                 CGF.Builder.getInt32Ty();
-
-    // Extending vectors to next possible width to make space for possible
-    // overflow.
-    llvm::Type *ExtType = llvm::VectorType::get(ExtElementType, NumElements);
-    Value *VecA = Signed ? CGF.Builder.CreateSExt(Ops[0], ExtType)
-                         : CGF.Builder.CreateZExt(Ops[0], ExtType);
-    Value *VecB = Signed ? CGF.Builder.CreateSExt(Ops[1], ExtType)
-                         : CGF.Builder.CreateZExt(Ops[1], ExtType);
-
-    llvm::Value *ExtProduct = IsAddition ? CGF.Builder.CreateAdd(VecA, VecB)
-                                         : CGF.Builder.CreateSub(VecA, VecB);
-
-    // Create vector of the same type as expected result with max possible
-    // values and extend it to the same type as the product of the addition.
-    APInt SignedMaxValue =
-        llvm::APInt::getSignedMaxValue(EltSizeInBits);
-    Value *Max = Signed ? llvm::ConstantInt::get(ResultType, SignedMaxValue)
-                        : llvm::Constant::getAllOnesValue(ResultType);
-    Value *ExtMaxVec = Signed ? CGF.Builder.CreateSExt(Max, ExtType)
-                              : CGF.Builder.CreateZExt(Max, ExtType);
-    // In Product, replace all overflowed values with max values of non-extended
-    // type.
-    ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-    Value *Cmp = CGF.Builder.CreateICmp(Pred, ExtProduct,
-                                        ExtMaxVec); // 1 if no overflow.
-    Value *SaturatedProduct = CGF.Builder.CreateSelect(
-        Cmp, ExtProduct, ExtMaxVec); // If overflowed, copy from max values.
-
-    if (Signed) {
-      APInt SignedMinValue =
-          llvm::APInt::getSignedMinValue(EltSizeInBits);
-      Value *Min = llvm::ConstantInt::get(ResultType, SignedMinValue);
-      Value *ExtMinVec = CGF.Builder.CreateSExt(Min, ExtType);
-      Value *IsNegative =
-        CGF.Builder.CreateICmp(ICmpInst::ICMP_SLT, SaturatedProduct, ExtMinVec);
-      SaturatedProduct =
-        CGF.Builder.CreateSelect(IsNegative, ExtMinVec, SaturatedProduct);
-    }
-
-    Res = CGF.Builder.CreateTrunc(SaturatedProduct,
-                                  ResultType); // Trunc to ResultType.
-  }
-  if (E->getNumArgs() == 4) { // For masked intrinsics.
-    Value *VecSRC = Ops[2];
-    Value *Mask = Ops[3];
-    return EmitX86Select(CGF, Mask, Res, VecSRC);
-  }
-
-  return Res;
-}
-
 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
@@ -9586,37 +9531,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Load->setVolatile(true);
     return Load;
   }
-  case X86::BI__builtin_ia32_paddusb512_mask:
-  case X86::BI__builtin_ia32_paddusw512_mask:
-  case X86::BI__builtin_ia32_paddusb256:
-  case X86::BI__builtin_ia32_paddusw256:
-  case X86::BI__builtin_ia32_paddusb128:
-  case X86::BI__builtin_ia32_paddusw128:
-    return EmitX86AddSubSatExpr(*this, E, Ops, true, false); // Add, unsigned.
-  case X86::BI__builtin_ia32_paddsb512_mask:
-  case X86::BI__builtin_ia32_paddsw512_mask:
-  case X86::BI__builtin_ia32_paddsb256:
-  case X86::BI__builtin_ia32_paddsw256:
-  case X86::BI__builtin_ia32_paddsb128:
-  case X86::BI__builtin_ia32_paddsw128:
-    return EmitX86AddSubSatExpr(*this, E, Ops, true, true); // Add, signed.
-  case X86::BI__builtin_ia32_psubusb512_mask:
-  case X86::BI__builtin_ia32_psubusw512_mask:
-  case X86::BI__builtin_ia32_psubusb256:
-  case X86::BI__builtin_ia32_psubusw256:
-  case X86::BI__builtin_ia32_psubusb128:
-  case X86::BI__builtin_ia32_psubusw128:
-    return EmitX86AddSubSatExpr(*this, E, Ops, false, false); // Sub, unsigned.
-  case X86::BI__builtin_ia32_psubsb512_mask:
-  case X86::BI__builtin_ia32_psubsw512_mask:
-  case X86::BI__builtin_ia32_psubsb256:
-  case X86::BI__builtin_ia32_psubsw256:
-  case X86::BI__builtin_ia32_psubsb128:
-  case X86::BI__builtin_ia32_psubsw128:
-    return EmitX86AddSubSatExpr(*this, E, Ops, false, true); // Sub, signed.
   }
 }
 
+
 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   SmallVector<Value*, 4> Ops;

+ 15 - 0
lib/CodeGen/CGCall.cpp

@@ -1727,6 +1727,9 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
     FuncAttrs.addAttribute("no-trapping-math",
                            llvm::toStringRef(CodeGenOpts.NoTrappingMath));
 
+    if (CodeGenOpts.FPCastOverflowWorkaround)
+      FuncAttrs.addAttribute("fp-cast-overflow-workaround", "true");
+
     // TODO: Are these all needed?
     // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
     FuncAttrs.addAttribute("no-infs-fp-math",
@@ -3063,6 +3066,18 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
   } else {
     args.add(convertTempToRValue(local, type, loc), type);
   }
+
+  // Deactivate the cleanup for the callee-destructed param that was pushed.
+  if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk &&
+      getContext().isParamDestroyedInCallee(type) && type.isDestructedType()) {
+    EHScopeStack::stable_iterator cleanup =
+        CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param));
+    assert(cleanup.isValid() &&
+           "cleanup for callee-destructed param not recorded");
+    // This unreachable is a temporary marker which will be removed later.
+    llvm::Instruction *isActive = Builder.CreateUnreachable();
+    args.addArgCleanupDeactivation(cleanup, isActive);
+  }
 }
 
 static bool isProvablyNull(llvm::Value *addr) {

+ 4 - 2
lib/CodeGen/CGCleanup.cpp

@@ -1233,8 +1233,10 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
   EHCleanupScope &Scope = cast<EHCleanupScope>(*EHStack.find(C));
   assert(Scope.isActive() && "double deactivation");
 
-  // If it's the top of the stack, just pop it.
-  if (C == EHStack.stable_begin()) {
+  // If it's the top of the stack, just pop it, but do so only if it belongs
+  // to the current RunCleanupsScope.
+  if (C == EHStack.stable_begin() &&
+      CurrentCleanupScopeDepth.strictlyEncloses(C)) {
     // If it's a normal cleanup, we need to pretend that the
     // fallthrough is unreachable.
     CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();

+ 2 - 0
lib/CodeGen/CGDecl.cpp

@@ -1962,6 +1962,8 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
                 DtorKind == QualType::DK_nontrivial_c_struct) &&
                "unexpected destructor type");
         pushDestroy(DtorKind, DeclPtr, Ty);
+        CalleeDestructedParamCleanups[cast<ParmVarDecl>(&D)] =
+            EHStack.stable_begin();
       }
     }
   } else {

+ 12 - 6
lib/CodeGen/CGOpenCLRuntime.cpp

@@ -66,13 +66,19 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
 }
 
 llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) {
-  if (!PipeTy){
-    uint32_t PipeAddrSpc = CGM.getContext().getTargetAddressSpace(
-        CGM.getContext().getOpenCLTypeAddrSpace(T));
-    PipeTy = llvm::PointerType::get(llvm::StructType::create(
-      CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc);
-  }
+  if (T->isReadOnly())
+    return getPipeType(T, "opencl.pipe_ro_t", PipeROTy);
+  else
+    return getPipeType(T, "opencl.pipe_wo_t", PipeWOTy);
+}
 
+llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T, StringRef Name,
+                                         llvm::Type *&PipeTy) {
+  if (!PipeTy)
+    PipeTy = llvm::PointerType::get(llvm::StructType::create(
+      CGM.getLLVMContext(), Name),
+      CGM.getContext().getTargetAddressSpace(
+          CGM.getContext().getOpenCLTypeAddrSpace(T)));
   return PipeTy;
 }
 

+ 7 - 3
lib/CodeGen/CGOpenCLRuntime.h

@@ -35,7 +35,8 @@ class CodeGenModule;
 class CGOpenCLRuntime {
 protected:
   CodeGenModule &CGM;
-  llvm::Type *PipeTy;
+  llvm::Type *PipeROTy;
+  llvm::Type *PipeWOTy;
   llvm::PointerType *SamplerTy;
 
   /// Structure for enqueued block information.
@@ -47,9 +48,12 @@ protected:
   /// Maps block expression to block information.
   llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
 
+  virtual llvm::Type *getPipeType(const PipeType *T, StringRef Name,
+                                  llvm::Type *&PipeTy);
+
 public:
-  CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr),
-    SamplerTy(nullptr) {}
+  CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM),
+    PipeROTy(nullptr), PipeWOTy(nullptr), SamplerTy(nullptr) {}
   virtual ~CGOpenCLRuntime();
 
   /// Emit the IR required for a work-group-local variable declaration, and add

+ 13 - 1
lib/CodeGen/CodeGenFunction.h

@@ -587,7 +587,7 @@ public:
   /// \brief Enters a new scope for capturing cleanups, all of which
   /// will be executed once the scope is exited.
   class RunCleanupsScope {
-    EHScopeStack::stable_iterator CleanupStackDepth;
+    EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth;
     size_t LifetimeExtendedCleanupStackSize;
     bool OldDidCallStackSave;
   protected:
@@ -610,6 +610,8 @@ public:
           CGF.LifetimeExtendedCleanupStack.size();
       OldDidCallStackSave = CGF.DidCallStackSave;
       CGF.DidCallStackSave = false;
+      OldCleanupScopeDepth = CGF.CurrentCleanupScopeDepth;
+      CGF.CurrentCleanupScopeDepth = CleanupStackDepth;
     }
 
     /// \brief Exit this cleanup scope, emitting any accumulated cleanups.
@@ -635,9 +637,14 @@ public:
       CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize,
                            ValuesToReload);
       PerformCleanup = false;
+      CGF.CurrentCleanupScopeDepth = OldCleanupScopeDepth;
     }
   };
 
+  // Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
+  EHScopeStack::stable_iterator CurrentCleanupScopeDepth =
+      EHScopeStack::stable_end();
+
   class LexicalScope : public RunCleanupsScope {
     SourceRange Range;
     SmallVector<const LabelDecl*, 4> Labels;
@@ -1095,6 +1102,11 @@ private:
   /// decls.
   DeclMapTy LocalDeclMap;
 
+  // Keep track of the cleanups for callee-destructed parameters pushed to the
+  // cleanup stack so that they can be deactivated later.
+  llvm::DenseMap<const ParmVarDecl *, EHScopeStack::stable_iterator>
+      CalleeDestructedParamCleanups;
+
   /// SizeArguments - If a ParmVarDecl had the pass_object_size attribute, this
   /// will contain a mapping from said ParmVarDecl to its implicit "object_size"
   /// parameter.

+ 1 - 1
lib/Driver/ToolChain.cpp

@@ -166,7 +166,7 @@ static const DriverSuffix *FindDriverSuffix(StringRef ProgName, size_t &Pos) {
 /// present and lower-casing the string on Windows.
 static std::string normalizeProgramName(llvm::StringRef Argv0) {
   std::string ProgName = llvm::sys::path::stem(Argv0);
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   // Transform to lowercase for case insensitive file systems.
   std::transform(ProgName.begin(), ProgName.end(), ProgName.begin(), ::tolower);
 #endif

+ 6 - 1
lib/Driver/ToolChains/Clang.cpp

@@ -2241,6 +2241,11 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
     CmdArgs.push_back("-mfpmath");
     CmdArgs.push_back(A->getValue());
   }
+
+  // Disable a codegen optimization for floating-point casts.
+  if (Args.hasFlag(options::OPT_ffp_cast_overflow_workaround,
+                   options::OPT_fno_fp_cast_overflow_workaround, false))
+    CmdArgs.push_back("-ffp-cast-overflow-workaround");
 }
 
 static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
@@ -4194,7 +4199,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (Args.hasFlag(options::OPT_fregister_global_dtors_with_atexit,
                    options::OPT_fno_register_global_dtors_with_atexit,
-                   RawTriple.isOSDarwin()))
+                   RawTriple.isOSDarwin() && !KernelOrKext))
     CmdArgs.push_back("-fregister-global-dtors-with-atexit");
 
   // -fms-extensions=0 is default.

+ 1 - 1
lib/Driver/ToolChains/CommonArgs.cpp

@@ -378,7 +378,7 @@ void tools::AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
   // forward.
   CmdArgs.push_back("-plugin");
 
-#if defined(LLVM_ON_WIN32)
+#if defined(_WIN32)
   const char *Suffix = ".dll";
 #elif defined(__APPLE__)
   const char *Suffix = ".dylib";

+ 2 - 2
lib/Driver/ToolChains/MSVC.cpp

@@ -33,7 +33,7 @@
 
 // Include the necessary headers to interface with the Windows registry and
 // environment.
-#if defined(LLVM_ON_WIN32)
+#if defined(_WIN32)
 #define USE_WIN32
 #endif
 
@@ -1242,7 +1242,7 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
   }
 
-#if defined(LLVM_ON_WIN32)
+#if defined(_WIN32)
   // As a fallback, select default install paths.
   // FIXME: Don't guess drives and paths like this on Windows.
   const StringRef Paths[] = {

+ 1 - 1
lib/Driver/ToolChains/PS4CPU.cpp

@@ -303,7 +303,7 @@ static void ConstructGoldLinkJob(const Tool &T, Compilation &C,
   }
 
   const char *Exec =
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
       Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld.gold"));
 #else
       Args.MakeArgString(ToolChain.GetProgramPath("orbis-ld"));

+ 21 - 4
lib/Format/TokenAnnotator.cpp

@@ -25,6 +25,21 @@ namespace format {
 
 namespace {
 
+/// \brief Returns \c true if the token can be used as an identifier in
+/// an Objective-C \c @selector, \c false otherwise.
+///
+/// Because getFormattingLangOpts() always lexes source code as
+/// Objective-C++, C++ keywords like \c new and \c delete are
+/// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
+///
+/// For Objective-C and Objective-C++, both identifiers and keywords
+/// are valid inside @selector(...) (or a macro which
+/// invokes @selector(...)). So, we allow treat any identifier or
+/// keyword as a potential Objective-C selector component.
+static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
+  return Tok.Tok.getIdentifierInfo() != nullptr;
+}
+
 /// \brief A parser that gathers additional information about tokens.
 ///
 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
@@ -703,9 +718,10 @@ private:
           Tok->Type = TT_CtorInitializerColon;
         else
           Tok->Type = TT_InheritanceColon;
-      } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
+      } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
                  (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
-                  Tok->Next->startsSequence(tok::identifier, tok::colon))) {
+                  (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
+                   Tok->Next->Next->is(tok::colon)))) {
         // This handles a special macro in ObjC code where selectors including
         // the colon are passed as macro arguments.
         Tok->Type = TT_ObjCMethodExpr;
@@ -1346,7 +1362,7 @@ private:
                                          TT_LeadingJavaAnnotation)) {
         Current.Type = Current.Previous->Type;
       }
-    } else if (Current.isOneOf(tok::identifier, tok::kw_new) &&
+    } else if (canBeObjCSelectorComponent(Current) &&
                // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
                Current.Previous && Current.Previous->is(TT_CastRParen) &&
                Current.Previous->MatchingParen &&
@@ -2650,7 +2666,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if (Line.Type == LT_ObjCMethodDecl) {
     if (Left.is(TT_ObjCMethodSpecifier))
       return true;
-    if (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_new))
+    if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
       // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
       // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
       // method declaration.
@@ -3128,6 +3144,7 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
       llvm::errs() << Tok->FakeLParens[i] << "/";
     llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
+    llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
     llvm::errs() << " Text='" << Tok->TokenText << "'\n";
     if (!Tok->Next)
       assert(Tok == Line.Last);

+ 2 - 0
lib/Frontend/CompilerInvocation.cpp

@@ -699,6 +699,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
   Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
   Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math);
+  Opts.FPCastOverflowWorkaround = Args.hasArg(OPT_ffp_cast_overflow_workaround);
+
   Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss);
   Opts.NumRegisterParameters = getLastArgIntValue(Args, OPT_mregparm, 0, Diags);
   Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack);

+ 1 - 1
lib/Frontend/InitHeaderSearch.cpp

@@ -105,7 +105,7 @@ public:
 }  // end anonymous namespace.
 
 static bool CanPrefixSysroot(StringRef Path) {
-#if defined(LLVM_ON_WIN32)
+#if defined(_WIN32)
   return !Path.empty() && llvm::sys::path::is_separator(Path[0]);
 #else
   return llvm::sys::path::is_absolute(Path);

+ 21 - 3
lib/Frontend/ModuleDependencyCollector.cpp

@@ -97,6 +97,24 @@ struct ModuleDependencyMMCallbacks : public ModuleMapCallbacks {
 
 }
 
+// TODO: move this to Support/Path.h and check for HAVE_REALPATH?
+static bool real_path(StringRef SrcPath, SmallVectorImpl<char> &RealPath) {
+#ifdef LLVM_ON_UNIX
+  char CanonicalPath[PATH_MAX];
+
+  // TODO: emit a warning in case this fails...?
+  if (!realpath(SrcPath.str().c_str(), CanonicalPath))
+    return false;
+
+  SmallString<256> RPath(CanonicalPath);
+  RealPath.swap(RPath);
+  return true;
+#else
+  // FIXME: Add support for systems without realpath.
+  return false;
+#endif
+}
+
 void ModuleDependencyCollector::attachToASTReader(ASTReader &R) {
   R.addListener(llvm::make_unique<ModuleDependencyListener>(*this));
 }
@@ -111,7 +129,7 @@ void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
 static bool isCaseSensitivePath(StringRef Path) {
   SmallString<256> TmpDest = Path, UpperDest, RealDest;
   // Remove component traversals, links, etc.
-  if (llvm::sys::fs::real_path(Path, TmpDest))
+  if (!real_path(Path, TmpDest))
     return true; // Current default value in vfs.yaml
   Path = TmpDest;
 
@@ -121,7 +139,7 @@ static bool isCaseSensitivePath(StringRef Path) {
   // already expects when sensitivity isn't setup.
   for (auto &C : Path)
     UpperDest.push_back(toUppercase(C));
-  if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path.equals(RealDest))
+  if (real_path(UpperDest, RealDest) && Path.equals(RealDest))
     return false;
   return true;
 }
@@ -171,7 +189,7 @@ bool ModuleDependencyCollector::getRealPath(StringRef SrcPath,
   // Computing the real path is expensive, cache the search through the
   // parent path directory.
   if (DirWithSymLink == SymLinkMap.end()) {
-    if (llvm::sys::fs::real_path(Dir, RealPath))
+    if (!real_path(Dir, RealPath))
       return false;
     SymLinkMap[Dir] = RealPath.str();
   } else {

+ 0 - 24
lib/Frontend/MultiplexConsumer.cpp

@@ -16,35 +16,11 @@
 #include "clang/Frontend/MultiplexConsumer.h"
 #include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/DeclGroup.h"
-#include "clang/Serialization/ASTDeserializationListener.h"
 
 using namespace clang;
 
 namespace clang {
 
-// This ASTDeserializationListener forwards its notifications to a set of
-// child listeners.
-class MultiplexASTDeserializationListener
-    : public ASTDeserializationListener {
-public:
-  // Does NOT take ownership of the elements in L.
-  MultiplexASTDeserializationListener(
-      const std::vector<ASTDeserializationListener*>& L);
-  void ReaderInitialized(ASTReader *Reader) override;
-  void IdentifierRead(serialization::IdentID ID,
-                      IdentifierInfo *II) override;
-  void MacroRead(serialization::MacroID ID, MacroInfo *MI) override;
-  void TypeRead(serialization::TypeIdx Idx, QualType T) override;
-  void DeclRead(serialization::DeclID ID, const Decl *D) override;
-  void SelectorRead(serialization::SelectorID iD, Selector Sel) override;
-  void MacroDefinitionRead(serialization::PreprocessedEntityID,
-                           MacroDefinitionRecord *MD) override;
-  void ModuleRead(serialization::SubmoduleID ID, Module *Mod) override;
-
-private:
-  std::vector<ASTDeserializationListener *> Listeners;
-};
-
 MultiplexASTDeserializationListener::MultiplexASTDeserializationListener(
       const std::vector<ASTDeserializationListener*>& L)
     : Listeners(L) {

+ 1 - 1
lib/Frontend/PrecompiledPreamble.cpp

@@ -40,7 +40,7 @@ namespace {
 StringRef getInMemoryPreamblePath() {
 #if defined(LLVM_ON_UNIX)
   return "/__clang_tmp/___clang_inmemory_preamble___";
-#elif defined(LLVM_ON_WIN32)
+#elif defined(_WIN32)
   return "C:\\__clang_tmp\\___clang_inmemory_preamble___";
 #else
 #warning "Unknown platform. Defaulting to UNIX-style paths for in-memory PCHs"

+ 1 - 1
lib/Lex/PPDirectives.cpp

@@ -1806,7 +1806,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   SmallString<128> NormalizedPath;
   if (LangOpts.MSVCCompat) {
     NormalizedPath = Filename.str();
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
     llvm::sys::path::native(NormalizedPath);
 #endif
   }

+ 1 - 2
lib/Parse/ParseDecl.cpp

@@ -5600,12 +5600,11 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
            D.getContext() == DeclaratorContext::MemberContext);
       }
 
-      SourceLocation TemplateKWLoc;
       bool HadScope = D.getCXXScopeSpec().isValid();
       if (ParseUnqualifiedId(D.getCXXScopeSpec(),
                              /*EnteringContext=*/true,
                              /*AllowDestructorName=*/true, AllowConstructorName,
-                             AllowDeductionGuide, nullptr, TemplateKWLoc,
+                             AllowDeductionGuide, nullptr, nullptr,
                              D.getName()) ||
           // Once we're past the identifier, if the scope was bad, mark the
           // whole declarator bad.

+ 3 - 2
lib/Parse/ParseDeclCXX.cpp

@@ -602,7 +602,7 @@ bool Parser::ParseUsingDeclarator(DeclaratorContext Context,
             /*AllowConstructorName=*/!(Tok.is(tok::identifier) &&
                                        NextToken().is(tok::equal)),
             /*AllowDeductionGuide=*/false,
-            nullptr, D.TemplateKWLoc, D.Name))
+            nullptr, nullptr, D.Name))
       return true;
   }
 
@@ -2476,7 +2476,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
       SourceLocation TemplateKWLoc;
       UnqualifiedId Name;
       if (ParseUnqualifiedId(SS, false, true, true, false, nullptr,
-                             TemplateKWLoc, Name)) {
+                             &TemplateKWLoc, Name)) {
         SkipUntil(tok::semi);
         return nullptr;
       }
@@ -2488,6 +2488,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
         return nullptr;
       }
 
+      // FIXME: We should do something with the 'template' keyword here.
       return DeclGroupPtrTy::make(DeclGroupRef(Actions.ActOnUsingDeclaration(
           getCurScope(), AS, /*UsingLoc*/ SourceLocation(),
           /*TypenameLoc*/ SourceLocation(), SS, Name,

+ 1 - 1
lib/Parse/ParseExpr.cpp

@@ -1755,7 +1755,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
                                     /*AllowConstructorName=*/
                                       getLangOpts().MicrosoftExt, 
                                     /*AllowDeductionGuide=*/false,
-                                    ObjectType, TemplateKWLoc, Name)) {
+                                    ObjectType, &TemplateKWLoc, Name)) {
         (void)Actions.CorrectDelayedTyposInExpr(LHS);
         LHS = ExprError();
       }

+ 54 - 28
lib/Parse/ParseExprCXX.cpp

@@ -553,7 +553,7 @@ ExprResult Parser::tryParseCXXIdExpression(CXXScopeSpec &SS, bool isAddressOfOpe
                          /*AllowDestructorName=*/false,
                          /*AllowConstructorName=*/false,
                          /*AllowDeductionGuide=*/false,
-                         /*ObjectType=*/nullptr, TemplateKWLoc, Name))
+                         /*ObjectType=*/nullptr, &TemplateKWLoc, Name))
     return ExprError();
 
   // This is only the direct operand of an & operator if it is not
@@ -2044,9 +2044,8 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS,
                                           ParsedType ObjectType,
                                           UnqualifiedId &Id,
                                           bool AssumeTemplateId) {
-  assert((AssumeTemplateId || Tok.is(tok::less)) &&
-         "Expected '<' to finish parsing a template-id");
-  
+  assert(Tok.is(tok::less) && "Expected '<' to finish parsing a template-id");
+
   TemplateTy Template;
   TemplateNameKind TNK = TNK_Non_template;
   switch (Id.getKind()) {
@@ -2142,10 +2141,10 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS,
   // Parse the enclosed template argument list.
   SourceLocation LAngleLoc, RAngleLoc;
   TemplateArgList TemplateArgs;
-  if (Tok.is(tok::less) && ParseTemplateIdAfterTemplateName(
-                               true, LAngleLoc, TemplateArgs, RAngleLoc))
+  if (ParseTemplateIdAfterTemplateName(true, LAngleLoc, TemplateArgs,
+                                       RAngleLoc))
     return true;
-  
+
   if (Id.getKind() == UnqualifiedIdKind::IK_Identifier ||
       Id.getKind() == UnqualifiedIdKind::IK_OperatorFunctionId ||
       Id.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId) {
@@ -2465,16 +2464,23 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
                                 bool AllowConstructorName,
                                 bool AllowDeductionGuide,
                                 ParsedType ObjectType,
-                                SourceLocation& TemplateKWLoc,
+                                SourceLocation *TemplateKWLoc,
                                 UnqualifiedId &Result) {
+  if (TemplateKWLoc)
+    *TemplateKWLoc = SourceLocation();
 
   // Handle 'A::template B'. This is for template-ids which have not
   // already been annotated by ParseOptionalCXXScopeSpecifier().
   bool TemplateSpecified = false;
-  if (getLangOpts().CPlusPlus && Tok.is(tok::kw_template) &&
-      (ObjectType || SS.isSet())) {
-    TemplateSpecified = true;
-    TemplateKWLoc = ConsumeToken();
+  if (Tok.is(tok::kw_template)) {
+    if (TemplateKWLoc && (ObjectType || SS.isSet())) {
+      TemplateSpecified = true;
+      *TemplateKWLoc = ConsumeToken();
+    } else {
+      SourceLocation TemplateLoc = ConsumeToken();
+      Diag(TemplateLoc, diag::err_unexpected_template_in_unqualified_id)
+        << FixItHint::CreateRemoval(TemplateLoc);
+    }
   }
 
   // unqualified-id:
@@ -2513,11 +2519,18 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     }
 
     // If the next token is a '<', we may have a template.
-    if (TemplateSpecified || Tok.is(tok::less))
-      return ParseUnqualifiedIdTemplateId(SS, TemplateKWLoc, Id, IdLoc,
-                                          EnteringContext, ObjectType,
-                                          Result, TemplateSpecified);
-    
+    TemplateTy Template;
+    if (Tok.is(tok::less))
+      return ParseUnqualifiedIdTemplateId(
+          SS, TemplateKWLoc ? *TemplateKWLoc : SourceLocation(), Id, IdLoc,
+          EnteringContext, ObjectType, Result, TemplateSpecified);
+    else if (TemplateSpecified &&
+             Actions.ActOnDependentTemplateName(
+                 getCurScope(), SS, *TemplateKWLoc, Result, ObjectType,
+                 EnteringContext, Template,
+                 /*AllowInjectedClassName*/ true) == TNK_Non_template)
+      return true;
+
     return false;
   }
   
@@ -2558,7 +2571,14 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     // We have already parsed a template-id; consume the annotation token as
     // our unqualified-id.
     Result.setTemplateId(TemplateId);
-    TemplateKWLoc = TemplateId->TemplateKWLoc;
+    SourceLocation TemplateLoc = TemplateId->TemplateKWLoc;
+    if (TemplateLoc.isValid()) {
+      if (TemplateKWLoc && (ObjectType || SS.isSet()))
+        *TemplateKWLoc = TemplateLoc;
+      else
+        Diag(TemplateLoc, diag::err_unexpected_template_in_unqualified_id)
+            << FixItHint::CreateRemoval(TemplateLoc);
+    }
     ConsumeAnnotationToken();
     return false;
   }
@@ -2575,13 +2595,20 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     // 
     //   template-id:
     //     operator-function-id < template-argument-list[opt] >
+    TemplateTy Template;
     if ((Result.getKind() == UnqualifiedIdKind::IK_OperatorFunctionId ||
          Result.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId) &&
-        (TemplateSpecified || Tok.is(tok::less)))
-      return ParseUnqualifiedIdTemplateId(SS, TemplateKWLoc,
-                                          nullptr, SourceLocation(),
-                                          EnteringContext, ObjectType,
-                                          Result, TemplateSpecified);
+        Tok.is(tok::less))
+      return ParseUnqualifiedIdTemplateId(
+          SS, TemplateKWLoc ? *TemplateKWLoc : SourceLocation(), nullptr,
+          SourceLocation(), EnteringContext, ObjectType, Result,
+          TemplateSpecified);
+    else if (TemplateSpecified &&
+             Actions.ActOnDependentTemplateName(
+                 getCurScope(), SS, *TemplateKWLoc, Result, ObjectType,
+                 EnteringContext, Template,
+                 /*AllowInjectedClassName*/ true) == TNK_Non_template)
+      return true;
 
     return false;
   }
@@ -2649,12 +2676,11 @@ bool Parser::ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
     IdentifierInfo *ClassName = Tok.getIdentifierInfo();
     SourceLocation ClassNameLoc = ConsumeToken();
 
-    if (TemplateSpecified || Tok.is(tok::less)) {
+    if (Tok.is(tok::less)) {
       Result.setDestructorName(TildeLoc, nullptr, ClassNameLoc);
-      return ParseUnqualifiedIdTemplateId(SS, TemplateKWLoc,
-                                          ClassName, ClassNameLoc,
-                                          EnteringContext, ObjectType,
-                                          Result, TemplateSpecified);
+      return ParseUnqualifiedIdTemplateId(
+          SS, TemplateKWLoc ? *TemplateKWLoc : SourceLocation(), ClassName,
+          ClassNameLoc, EnteringContext, ObjectType, Result, TemplateSpecified);
     }
 
     // Note that this is a destructor name.

+ 2 - 4
lib/Parse/ParseOpenMP.cpp

@@ -1154,7 +1154,6 @@ bool Parser::ParseOpenMPSimpleVarList(
   // Read tokens while ')' or annot_pragma_openmp_end is not found.
   while (Tok.isNot(tok::r_paren) && Tok.isNot(tok::annot_pragma_openmp_end)) {
     CXXScopeSpec SS;
-    SourceLocation TemplateKWLoc;
     UnqualifiedId Name;
     // Read var name.
     Token PrevTok = Tok;
@@ -1166,7 +1165,7 @@ bool Parser::ParseOpenMPSimpleVarList(
       SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
     } else if (ParseUnqualifiedId(SS, false, false, false, false, nullptr,
-                                  TemplateKWLoc, Name)) {
+                                  nullptr, Name)) {
       IsCorrect = false;
       SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
@@ -1648,7 +1647,6 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind,
 
 static bool ParseReductionId(Parser &P, CXXScopeSpec &ReductionIdScopeSpec,
                              UnqualifiedId &ReductionId) {
-  SourceLocation TemplateKWLoc;
   if (ReductionIdScopeSpec.isEmpty()) {
     auto OOK = OO_None;
     switch (P.getCurToken().getKind()) {
@@ -1690,7 +1688,7 @@ static bool ParseReductionId(Parser &P, CXXScopeSpec &ReductionIdScopeSpec,
                               /*AllowDestructorName*/ false,
                               /*AllowConstructorName*/ false,
                               /*AllowDeductionGuide*/ false,
-                              nullptr, TemplateKWLoc, ReductionId);
+                              nullptr, nullptr, ReductionId);
 }
 
 /// Parses clauses with list.

+ 1 - 1
lib/Parse/ParseStmtAsm.cpp

@@ -239,7 +239,7 @@ ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
                                  /*AllowDestructorName=*/false,
                                  /*AllowConstructorName=*/false,
                                  /*AllowDeductionGuide=*/false,
-                                 /*ObjectType=*/nullptr, TemplateKWLoc, Id);
+                                 /*ObjectType=*/nullptr, &TemplateKWLoc, Id);
     // Perform the lookup.
     Result = Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id,
                                                IsUnevaluatedContext);

+ 1 - 1
lib/Parse/Parser.cpp

@@ -2002,7 +2002,7 @@ bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) {
   if (ParseUnqualifiedId(
           Result.SS, /*EnteringContext*/false, /*AllowDestructorName*/true,
           /*AllowConstructorName*/true, /*AllowDeductionGuide*/false, nullptr,
-          TemplateKWLoc, Result.Name)) {
+          &TemplateKWLoc, Result.Name)) {
     T.skipToEnd();
     return true;
   }

+ 17 - 0
lib/Sema/SemaChecking.cpp

@@ -2309,6 +2309,17 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID,
     << Arg->getSourceRange();
 }
 
+static bool isX86_32Builtin(unsigned BuiltinID) {
+  // These builtins only work on x86-32 targets.
+  switch (BuiltinID) {
+  case X86::BI__builtin_ia32_readeflags_u32:
+  case X86::BI__builtin_ia32_writeeflags_u32:
+    return true;
+  }
+
+  return false;
+}
+
 bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   if (BuiltinID == X86::BI__builtin_cpu_supports)
     return SemaBuiltinCpuSupports(*this, TheCall);
@@ -2316,6 +2327,12 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   if (BuiltinID == X86::BI__builtin_cpu_is)
     return SemaBuiltinCpuIs(*this, TheCall);
 
+  // Check for 32-bit only builtins on a 64-bit target.
+  const llvm::Triple &TT = Context.getTargetInfo().getTriple();
+  if (TT.getArch() != llvm::Triple::x86 && isX86_32Builtin(BuiltinID))
+    return Diag(TheCall->getCallee()->getLocStart(),
+                diag::err_32_bit_builtin_64_bit_tgt);
+
   // If the intrinsic has rounding or SAE make sure its valid.
   if (CheckX86BuiltinRoundingOrSAE(BuiltinID, TheCall))
     return true;

+ 1 - 1
lib/Sema/SemaDecl.cpp

@@ -6979,7 +6979,7 @@ NamedDecl *Sema::getShadowedDeclaration(const TypedefNameDecl *D,
   // Don't warn if typedef declaration is part of a class
   if (D->getDeclContext()->isRecord())
     return nullptr;
-  
+
   if (!shouldWarnIfShadowedDecl(Diags, R))
     return nullptr;
 

+ 5 - 2
lib/Sema/SemaExpr.cpp

@@ -2080,7 +2080,7 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
                  (Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam)
                      ? LookupObjCImplicitSelfParam
                      : LookupOrdinaryName);
-  if (TemplateArgs) {
+  if (TemplateKWLoc.isValid() || TemplateArgs) {
     // Lookup the template name again to correctly establish the context in
     // which it was found. This is really unfortunate as we already did the
     // lookup to determine that it was a template name in the first place. If
@@ -2089,7 +2089,7 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
     bool MemberOfUnknownSpecialization;
     LookupTemplateName(R, S, SS, QualType(), /*EnteringContext=*/false,
                        MemberOfUnknownSpecialization);
-    
+
     if (MemberOfUnknownSpecialization ||
         (R.getResultKind() == LookupResult::NotFoundInCurrentInstantiation))
       return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo,
@@ -2155,6 +2155,9 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
       if (SS.isValid())
         CCC->setTypoNNS(SS.getScopeRep());
     }
+    // FIXME: DiagnoseEmptyLookup produces bad diagnostics if we're looking for
+    // a template name, but we happen to have always already looked up the name
+    // before we get here if it must be a template name.
     if (DiagnoseEmptyLookup(S, SS, R,
                             CCC ? std::move(CCC) : std::move(DefaultValidator),
                             nullptr, None, &TE)) {

+ 7 - 7
lib/Sema/SemaExprMember.cpp

@@ -759,9 +759,9 @@ Sema::BuildMemberReferenceExpr(Expr *Base, QualType BaseType,
     TypoExpr *TE = nullptr;
     QualType RecordTy = BaseType;
     if (IsArrow) RecordTy = RecordTy->getAs<PointerType>()->getPointeeType();
-    if (LookupMemberExprInRecord(*this, R, nullptr,
-                                 RecordTy->getAs<RecordType>(), OpLoc, IsArrow,
-                                 SS, TemplateArgs != nullptr, TE))
+    if (LookupMemberExprInRecord(
+            *this, R, nullptr, RecordTy->getAs<RecordType>(), OpLoc, IsArrow,
+            SS, TemplateKWLoc.isValid() || TemplateArgs != nullptr, TE))
       return ExprError();
     if (TE)
       return TE;
@@ -769,10 +769,10 @@ Sema::BuildMemberReferenceExpr(Expr *Base, QualType BaseType,
   // Explicit member accesses.
   } else {
     ExprResult BaseResult = Base;
-    ExprResult Result = LookupMemberExpr(
-        *this, R, BaseResult, IsArrow, OpLoc, SS,
-        ExtraArgs ? ExtraArgs->ObjCImpDecl : nullptr,
-        TemplateArgs != nullptr);
+    ExprResult Result =
+        LookupMemberExpr(*this, R, BaseResult, IsArrow, OpLoc, SS,
+                         ExtraArgs ? ExtraArgs->ObjCImpDecl : nullptr,
+                         TemplateKWLoc.isValid() || TemplateArgs != nullptr);
 
     if (BaseResult.isInvalid())
       return ExprError();

+ 2 - 2
lib/Sema/SemaTemplate.cpp

@@ -158,7 +158,7 @@ bool Sema::hasAnyAcceptableTemplateNames(LookupResult &R,
 TemplateNameKind Sema::isTemplateName(Scope *S,
                                       CXXScopeSpec &SS,
                                       bool hasTemplateKeyword,
-                                      UnqualifiedId &Name,
+                                      const UnqualifiedId &Name,
                                       ParsedType ObjectTypePtr,
                                       bool EnteringContext,
                                       TemplateTy &TemplateResult,
@@ -4102,7 +4102,7 @@ Sema::BuildQualifiedTemplateIdExpr(CXXScopeSpec &SS,
 TemplateNameKind Sema::ActOnDependentTemplateName(Scope *S,
                                                   CXXScopeSpec &SS,
                                                   SourceLocation TemplateKWLoc,
-                                                  UnqualifiedId &Name,
+                                                  const UnqualifiedId &Name,
                                                   ParsedType ObjectType,
                                                   bool EnteringContext,
                                                   TemplateTy &Result,

+ 6 - 0
lib/Serialization/ASTReaderDecl.cpp

@@ -1205,6 +1205,12 @@ void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) {
     ProtoLocs.push_back(ReadSourceLocation());
   CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(),
                       Reader.getContext());
+
+  // Protocols in the class extension belong to the class.
+  if (NumProtoRefs > 0 && CD->ClassInterface && CD->IsClassExtension())
+    CD->ClassInterface->mergeClassExtensionProtocolList(
+        (ObjCProtocolDecl *const *)ProtoRefs.data(), NumProtoRefs,
+        Reader.getContext());
 }
 
 void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) {

+ 39 - 2
lib/StaticAnalyzer/Checkers/CStringChecker.cpp

@@ -97,14 +97,17 @@ public:
   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const;
   void evalStrcpyCommon(CheckerContext &C,
                         const CallExpr *CE,
                         bool returnEnd,
                         bool isBounded,
-                        bool isAppending) const;
+                        bool isAppending,
+                        bool canOverlap = false) const;
 
   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrlcat(CheckerContext &C, const CallExpr *CE) const;
 
   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
@@ -1393,6 +1396,18 @@ void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
                    /* isAppending = */ false);
 }
 
+void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const {
+  if (CE->getNumArgs() < 3)
+    return;
+
+  // char *strlcpy(char *dst, const char *src, size_t n);
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ true,
+                   /* isBounded = */ true,
+                   /* isAppending = */ false,
+                   /* canOverlap = */ true);
+}
+
 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
   if (CE->getNumArgs() < 2)
     return;
@@ -1415,9 +1430,21 @@ void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
                    /* isAppending = */ true);
 }
 
+void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const {
+  if (CE->getNumArgs() < 3)
+    return;
+
+  //char *strlcat(char *s1, const char *s2, size_t n);
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ false,
+                   /* isBounded = */ true,
+                   /* isAppending = */ true,
+                   /* canOverlap = */ true);
+}
+
 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
                                       bool returnEnd, bool isBounded,
-                                      bool isAppending) const {
+                                      bool isAppending, bool canOverlap) const {
   CurrentFunctionDescription = "string copy function";
   ProgramStateRef state = C.getState();
   const LocationContext *LCtx = C.getLocationContext();
@@ -1455,6 +1482,12 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
   SVal maxLastElementIndex = UnknownVal();
   const char *boundWarning = nullptr;
 
+  if (canOverlap)
+    state = CheckOverlap(C, state, CE->getArg(2), Dst, srcExpr);
+
+  if (!state)
+    return;
+
   // If the function is strncpy, strncat, etc... it is bounded.
   if (isBounded) {
     // Get the max number of characters to copy.
@@ -2091,10 +2124,14 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
     evalFunction =  &CStringChecker::evalStrncpy;
   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
     evalFunction =  &CStringChecker::evalStpcpy;
+  else if (C.isCLibraryFunction(FDecl, "strlcpy"))
+    evalFunction =  &CStringChecker::evalStrlcpy;
   else if (C.isCLibraryFunction(FDecl, "strcat"))
     evalFunction =  &CStringChecker::evalStrcat;
   else if (C.isCLibraryFunction(FDecl, "strncat"))
     evalFunction =  &CStringChecker::evalStrncat;
+  else if (C.isCLibraryFunction(FDecl, "strlcat"))
+    evalFunction =  &CStringChecker::evalStrlcat;
   else if (C.isCLibraryFunction(FDecl, "strlen"))
     evalFunction =  &CStringChecker::evalstrLength;
   else if (C.isCLibraryFunction(FDecl, "strnlen"))

+ 31 - 6
lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp

@@ -51,18 +51,43 @@ public:
                         BugReporter &BR) const;
 private:
   std::vector<std::string> SelectorsWithAutoreleasingPool = {
+      // Common to NSArray,  NSSet, NSOrderedSet
       "enumerateObjectsUsingBlock:",
-      "enumerateKeysAndObjectsUsingBlock:",
-      "enumerateKeysAndObjectsWithOptions:usingBlock:",
       "enumerateObjectsWithOptions:usingBlock:",
+
+      // Common to NSArray and NSOrderedSet
       "enumerateObjectsAtIndexes:options:usingBlock:",
+      "indexOfObjectAtIndexes:options:passingTest:",
+      "indexesOfObjectsAtIndexes:options:passingTest:",
+      "indexOfObjectPassingTest:",
+      "indexOfObjectWithOptions:passingTest:",
+      "indexesOfObjectsPassingTest:",
+      "indexesOfObjectsWithOptions:passingTest:",
+
+      // NSDictionary
+      "enumerateKeysAndObjectsUsingBlock:",
+      "enumerateKeysAndObjectsWithOptions:usingBlock:",
+      "keysOfEntriesPassingTest:",
+      "keysOfEntriesWithOptions:passingTest:",
+
+      // NSSet
+      "objectsPassingTest:",
+      "objectsWithOptions:passingTest:",
+      "enumerateIndexPathsWithOptions:usingBlock:",
+
+      // NSIndexSet
       "enumerateIndexesWithOptions:usingBlock:",
       "enumerateIndexesUsingBlock:",
       "enumerateIndexesInRange:options:usingBlock:",
       "enumerateRangesUsingBlock:",
       "enumerateRangesWithOptions:usingBlock:",
-      "enumerateRangesInRange:options:usingBlock:"
-      "objectWithOptions:passingTest:",
+      "enumerateRangesInRange:options:usingBlock:",
+      "indexPassingTest:",
+      "indexesPassingTest:",
+      "indexWithOptions:passingTest:",
+      "indexesWithOptions:passingTest:",
+      "indexInRange:options:passingTest:",
+      "indexesInRange:options:passingTest:"
   };
 
   std::vector<std::string> FunctionsWithAutoreleasingPool = {
@@ -95,9 +120,9 @@ static void emitDiagnostics(BoundNodes &Match, const Decl *D, BugReporter &BR,
   assert(SW);
   BR.EmitBasicReport(
       ADC->getDecl(), Checker,
-      /*Name=*/"Writing into auto-releasing variable from a different queue",
+      /*Name=*/"Write to autoreleasing out parameter inside autorelease pool",
       /*Category=*/"Memory",
-      (llvm::Twine("Writing into an auto-releasing out parameter inside ") +
+      (llvm::Twine("Write to autoreleasing out parameter inside ") +
        "autorelease pool that may exit before " + Name + " returns; consider "
        "writing first to a strong local variable declared outside of the block")
           .str(),

+ 4 - 2
lib/StaticAnalyzer/Core/ExprEngine.cpp

@@ -1086,12 +1086,14 @@ void ExprEngine::ProcessDeleteDtor(const CFGDeleteDtor Dtor,
     // This workaround will just run the first destructor (which will still
     // invalidate the entire array).
     CallOpts.IsArrayCtorOrDtor = true;
+    // Yes, it may even be a multi-dimensional array.
+    while (const auto *AT = getContext().getAsArrayType(DTy))
+      DTy = AT->getElementType();
     if (ArgR)
       ArgR = getStoreManager().GetElementZeroRegion(cast<SubRegion>(ArgR), DTy);
   }
 
-  VisitCXXDestructor(DE->getDestroyedType(), ArgR, DE, /*IsBase=*/false,
-                     Pred, Dst, CallOpts);
+  VisitCXXDestructor(DTy, ArgR, DE, /*IsBase=*/false, Pred, Dst, CallOpts);
 }
 
 void ExprEngine::ProcessBaseDtor(const CFGBaseDtor D,

+ 44 - 10
test/Analysis/autoreleasewritechecker_test.m

@@ -4,6 +4,7 @@
 
 
 typedef signed char BOOL;
+#define YES ((BOOL)1)
 @protocol NSObject  - (BOOL)isEqual:(id)object; @end
 @interface NSObject <NSObject> {}
 +(id)alloc;
@@ -14,6 +15,8 @@ typedef signed char BOOL;
 @end
 typedef int NSZone;
 typedef int NSCoder;
+typedef unsigned long NSUInteger;
+
 @protocol NSCopying  - (id)copyWithZone:(NSZone *)zone; @end
 @protocol NSCoding  - (void)encodeWithCoder:(NSCoder *)aCoder; @end
 @interface NSError : NSObject <NSCopying, NSCoding> {}
@@ -23,8 +26,27 @@ typedef int NSCoder;
 typedef int dispatch_semaphore_t;
 typedef void (^block_t)();
 
+typedef enum {
+  NSEnumerationConcurrent = (1UL << 0),
+  NSEnumerationReverse = (1UL << 1)
+} NSEnumerationOptions;
+
 @interface NSArray
-- (void) enumerateObjectsUsingBlock:(block_t)block;
+- (void)enumerateObjectsUsingBlock:(block_t)block;
+@end
+
+@interface NSSet
+- (void)objectsPassingTest:(block_t)block;
+@end
+
+@interface NSDictionary
+- (void)enumerateKeysAndObjectsUsingBlock:(block_t)block;
+@end
+
+@interface NSIndexSet
+- (void)indexesPassingTest:(block_t)block;
+- (NSUInteger)indexWithOptions:(NSEnumerationOptions)opts
+                   passingTest:(BOOL (^)(NSUInteger idx, BOOL *stop))predicate;
 @end
 
 typedef int group_t;
@@ -69,7 +91,7 @@ BOOL writeToErrorWithIterator(NSError ** error, NSArray *a) {
     dispatch_semaphore_t sem = dispatch_semaphore_create(0l);
     dispatch_async(queue, ^{
         if (error) {
-            *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out parameter inside autorelease pool that may exit before method returns}}
+            *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before method returns; consider writing first to a strong local variable declared outside of the block}}
         }
         dispatch_semaphore_signal(sem);
     });
@@ -82,7 +104,7 @@ BOOL writeToErrorWithIterator(NSError ** error, NSArray *a) {
     dispatch_semaphore_t sem = dispatch_semaphore_create(0l);
     dispatch_group_async(queue, 0, ^{
         if (error) {
-            *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out}}
+            *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before method returns; consider writing first to a strong local variable declared outside of the block}}
         }
         dispatch_semaphore_signal(sem);
     });
@@ -123,14 +145,14 @@ BOOL writeToErrorWithIterator(NSError ** error, NSArray *a) {
     dispatch_semaphore_t sem = dispatch_semaphore_create(0l);
     dispatch_async(queue, ^{
         if (error) {
-            *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out}}
+            *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before method returns; consider writing first to a strong local variable declared outside of the block}}
         }
         dispatch_semaphore_signal(sem);
     });
     dispatch_async(queue, ^{
         if (error) {
-            *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out}}
-            *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out}}
+            *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before method returns; consider writing first to a strong local variable declared outside of the block}}
+            *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before method returns; consider writing first to a strong local variable declared outside of the block}}
         }
         dispatch_semaphore_signal(sem);
     });
@@ -150,7 +172,7 @@ BOOL writeToErrorInBlockFromCFunc(NSError *__autoreleasing* error) {
     dispatch_semaphore_t sem = dispatch_semaphore_create(0l);
     dispatch_async(queue, ^{
         if (error) {
-            *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out}}
+            *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
         }
         dispatch_semaphore_signal(sem);
     });
@@ -164,9 +186,21 @@ BOOL writeToErrorNoWarning(NSError *__autoreleasing* error) {
   return 0;
 }
 
-BOOL writeToErrorWithIterator(NSError *__autoreleasing* error, NSArray *a) {
-  [a enumerateObjectsUsingBlock:^{
-    *error = [NSError errorWithDomain:1]; // expected-warning{{Writing into an auto-releasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
+BOOL writeToErrorWithIterator(NSError *__autoreleasing* error, NSArray *a, NSSet *s, NSDictionary *d, NSIndexSet *i) { [a enumerateObjectsUsingBlock:^{
+    *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
+    }];
+  [d enumerateKeysAndObjectsUsingBlock:^{
+    *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
+    }];
+  [s objectsPassingTest:^{
+    *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
+    }];
+  [i indexesPassingTest:^{
+    *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
+    }];
+  [i indexWithOptions: NSEnumerationReverse passingTest:^(NSUInteger idx, BOOL *stop) {
+    *error = [NSError errorWithDomain:1]; // expected-warning{{Write to autoreleasing out parameter inside autorelease pool that may exit before function returns; consider writing first to a strong local variable declared outside of the block}}
+    return YES;
     }];
   return 0;
 }

+ 18 - 0
test/Analysis/new.cpp

@@ -274,6 +274,24 @@ void test_var_delete() {
   clang_analyzer_eval(true); // expected-warning{{TRUE}}
 }
 
+void test_array_delete() {
+  class C {
+  public:
+    ~C() {}
+  };
+
+  auto c1 = new C[2][3];
+  delete[] c1; // no-crash // no-warning
+
+  C c2[4];
+  // FIXME: Should warn.
+  delete[] &c2; // no-crash
+
+  C c3[7][6];
+  // FIXME: Should warn.
+  delete[] &c3; // no-crash
+}
+
 void testDeleteNull() {
   NoReturnDtor *foo = 0;
   delete foo; // should not call destructor, checked below

+ 4 - 1
test/CXX/drs/dr1xx.cpp

@@ -67,7 +67,10 @@ namespace dr108 { // dr108: yes
 namespace dr109 { // dr109: yes
   struct A { template<typename T> void f(T); };
   template<typename T> struct B : T {
-    using T::template f; // expected-error {{using declaration cannot refer to a template}}
+    using T::template f; // expected-error {{'template' keyword not permitted here}}
+    using T::template f<int>; // expected-error {{'template' keyword not permitted here}} expected-error {{using declaration cannot refer to a template specialization}}
+    // FIXME: We shouldn't suggest using the 'template' keyword in a location where it's not valid.
+    using T::f<int>; // expected-error {{use 'template' keyword}} expected-error {{using declaration cannot refer to a template specialization}}
     void g() { this->f<int>(123); } // expected-error {{use 'template'}}
   };
 }

+ 2 - 2
test/CXX/drs/dr4xx.cpp

@@ -318,8 +318,8 @@ namespace dr420 { // dr420: yes
     q->~id<int>();
     p->id<int>::~id<int>();
     q->id<int>::~id<int>();
-    p->template id<int>::~id<int>(); // expected-error {{expected unqualified-id}}
-    q->template id<int>::~id<int>(); // expected-error {{expected unqualified-id}}
+    p->template id<int>::~id<int>(); // expected-error {{'template' keyword not permitted here}} expected-error {{base type 'int' is not a struct}}
+    q->template id<int>::~id<int>(); // expected-error {{'template' keyword not permitted here}} expected-error {{base type 'int' is not a struct}}
     p->A::template id<int>::~id<int>();
     q->A::template id<int>::~id<int>();
   }

+ 1 - 1
test/CXX/temp/temp.res/p3.cpp

@@ -34,5 +34,5 @@ template<typename T> template<typename U> struct A<T>::B {
   // a type, and then complain about the rest of the tokens, and then complain
   // that we didn't get a function declaration.
   friend A<U>::C<T> f7(); // expected-error {{use 'template' keyword to treat 'C' as a dependent template name}} expected-error 3{{}}
-  friend A<U>::template C<T> f8(); // expected-error 3{{}}
+  friend A<U>::template C<T> f8(); // expected-error 4{{}}
 };

+ 117 - 0
test/CodeGen/aarch64-neon-dot-product.c

@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +dotprod \
+// RUN: -disable-O0-optnone  -emit-llvm -o - %s | opt -S -instcombine | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+// Test AArch64 Armv8.2-A dot product intrinsics
+
+#include <arm_neon.h>
+
+uint32x2_t test_vdot_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_u32(a, b, c);
+}
+
+uint32x4_t test_vdotq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_u32(a, b, c);
+}
+
+int32x2_t test_vdot_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_s32(a, b, c);
+}
+
+int32x4_t test_vdotq_s32(int32x4_t a, int8x16_t b, int8x16_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_s32(a, b, c);
+}
+
+uint32x2_t test_vdot_lane_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_lane_u32(a, b, c, 1);
+}
+
+uint32x4_t test_vdotq_lane_u32(uint32x4_t a, uint8x16_t b, uint8x8_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_lane_u32(a, b, c, 1);
+}
+
+uint32x2_t test_vdot_laneq_u32(uint32x2_t a, uint8x8_t b, uint8x16_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_laneq_u32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_laneq_u32(a, b, c, 1);
+}
+
+uint32x4_t test_vdotq_laneq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_laneq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_laneq_u32(a, b, c, 1);
+}
+
+int32x2_t test_vdot_lane_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_lane_s32(a, b, c, 1);
+}
+
+int32x4_t test_vdotq_lane_s32(int32x4_t a, int8x16_t b, int8x8_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_lane_s32(a, b, c, 1);
+}
+
+int32x2_t test_vdot_laneq_s32(int32x2_t a, int8x8_t b, int8x16_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_laneq_s32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_laneq_s32(a, b, c, 1);
+}
+
+int32x4_t test_vdotq_laneq_s32(int32x4_t a, int8x16_t b, int8x16_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_laneq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_laneq_s32(a, b, c, 1);
+}
+

+ 76 - 0
test/CodeGen/arm-neon-dot-product.c

@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple armv8-linux-gnueabihf -target-cpu cortex-a75 -target-feature +dotprod \
+// RUN: -disable-O0-optnone  -emit-llvm -o - %s | opt -S -instcombine | FileCheck %s
+
+// REQUIRES: arm-registered-target
+
+// Test ARM v8.2-A dot product intrinsics
+
+#include <arm_neon.h>
+
+uint32x2_t test_vdot_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_u32(a, b, c);
+}
+
+uint32x4_t test_vdotq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_u32(a, b, c);
+}
+
+int32x2_t test_vdot_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_s32(a, b, c);
+}
+
+int32x4_t test_vdotq_s32(int32x4_t a, int8x16_t b, int8x16_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_s32(a, b, c);
+}
+
+uint32x2_t test_vdot_lane_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_lane_u32(a, b, c, 1);
+}
+
+uint32x4_t test_vdotq_lane_u32(uint32x4_t a, uint8x16_t b, uint8x8_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_lane_u32(a, b, c, 1);
+}
+
+int32x2_t test_vdot_lane_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
+// CHECK-LABEL: define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
+// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
+// CHECK: ret <2 x i32> [[RESULT]]
+  return vdot_lane_s32(a, b, c, 1);
+}
+
+int32x4_t test_vdotq_lane_s32(int32x4_t a, int8x16_t b, int8x8_t c) {
+// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c)
+// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
+// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
+// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
+// CHECK: ret <4 x i32> [[RESULT]]
+  return vdotq_lane_s32(a, b, c, 1);
+}

+ 8 - 58
test/CodeGen/avx2-builtins.c

@@ -56,53 +56,25 @@ __m256i test_mm256_add_epi64(__m256i a, __m256i b) {
 
 __m256i test_mm256_adds_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epi8
-  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32 x i16> %{{.*}}
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_adds_epi8(a, b);
 }
 
 __m256i test_mm256_adds_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epi16
-  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <16 x i32> %{{.*}}
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_adds_epi16(a, b);
 }
 
 __m256i test_mm256_adds_epu8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epu8
-  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_adds_epu8(a, b);
 }
 
 __m256i test_mm256_adds_epu16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_adds_epu16
-  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_adds_epu16(a, b);
 }
 
@@ -1199,47 +1171,25 @@ __m256i test_mm256_sub_epi64(__m256i a, __m256i b) {
 
 __m256i test_mm256_subs_epi8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epi8
-  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32 x i16> %{{.*}}
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_subs_epi8(a, b);
 }
 
 __m256i test_mm256_subs_epi16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epi16
-  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sub <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <16 x i32> %{{.*}}
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_subs_epi16(a, b);
 }
 
 __m256i test_mm256_subs_epu8(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epu8
-  // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
-  // CHECK: icmp ugt <32 x i8> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i8> {{.*}}, <32 x i8> {{.*}}
-  // CHECK: sub <32 x i8> {{.*}}, {{.*}}
+  // CHECK: call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
   return _mm256_subs_epu8(a, b);
 }
 
 __m256i test_mm256_subs_epu16(__m256i a, __m256i b) {
   // CHECK-LABEL: test_mm256_subs_epu16
-  // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
-  // CHECK: icmp ugt <16 x i16> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i16> {{.*}}, <16 x i16> {{.*}}
-  // CHECK: sub <16 x i16> {{.*}}, {{.*}}
+  // CHECK: call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
   return _mm256_subs_epu16(a, b);
 }
 

+ 39 - 205
test/CodeGen/avx512bw-builtins.c

@@ -594,154 +594,62 @@ __m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
 }
 __m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_adds_epi8
-  // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <64 x i16> %{{.*}}
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
   return _mm512_adds_epi8(__A,__B); 
 }
 __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_adds_epi8
-  // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <64 x i16> %{{.*}}
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
- return _mm512_mask_adds_epi8(__W,__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_adds_epi8
-  // CHECK-NOT: @llvm.x86.avx512.mask.padds.b.512
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <64 x i16> %{{.*}}
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
   return _mm512_maskz_adds_epi8(__U,__A,__B); 
 }
 __m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_adds_epi16
-  // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x i32> %{{.*}}
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
- return _mm512_adds_epi16(__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_adds_epi16(__A,__B); 
 }
 __m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_adds_epi16
-  // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x i32> %{{.*}}
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
   return _mm512_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_adds_epi16
-  // CHECK-NOT: @llvm.x86.avx512.mask.padds.w.512
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x i32> %{{.*}}
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-return _mm512_maskz_adds_epi16(__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_maskz_adds_epi16(__U,__A,__B); 
 }
 __m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_adds_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
   return _mm512_adds_epu8(__A,__B); 
 }
 __m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_adds_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
   return _mm512_mask_adds_epu8(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_adds_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <64 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
   return _mm512_maskz_adds_epu8(__U,__A,__B); 
 }
 __m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_adds_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
   return _mm512_adds_epu16(__A,__B); 
 }
 __m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_adds_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
   return _mm512_mask_adds_epu16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_adds_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <32 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
   return _mm512_maskz_adds_epu16(__U,__A,__B); 
 }
 __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
@@ -995,137 +903,63 @@ __m512i test_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
 }
 __m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_subs_epi8
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <64 x i16> %{{.*}}
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-return _mm512_subs_epi8(__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_subs_epi8(__A,__B); 
 }
 __m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_subs_epi8
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <64 x i16> %{{.*}}
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
-return _mm512_mask_subs_epi8(__W,__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_mask_subs_epi8(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_subs_epi8
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.b.512
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sext <64 x i8> %{{.*}} to <64 x i16>
-  // CHECK: sub <64 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <64 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> %{{.*}}, <64 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <64 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <64 x i16> %{{.*}}
-  // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
-return _mm512_maskz_subs_epi8(__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_maskz_subs_epi8(__U,__A,__B); 
 }
 __m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_subs_epi16
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x i32> %{{.*}}
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-return _mm512_subs_epi16(__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_subs_epi16(__A,__B); 
 }
 __m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_subs_epi16
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x i32> %{{.*}}
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-return _mm512_mask_subs_epi16(__W,__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_mask_subs_epi16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_subs_epi16
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubs.w.512
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sext <32 x i16> %{{.*}} to <32 x i32>
-  // CHECK: sub <32 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> %{{.*}}, <32 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <32 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <32 x i32> %{{.*}}
-  // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-return _mm512_maskz_subs_epi16(__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_maskz_subs_epi16(__U,__A,__B); 
 }
 __m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_subs_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
-  // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
-  // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
-  // CHECK: sub <64 x i8> {{.*}}, {{.*}}
-return _mm512_subs_epu8(__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_subs_epu8(__A,__B); 
 }
 __m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_subs_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
-  // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
-  // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
-  // CHECK: sub <64 x i8> {{.*}}, {{.*}}
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
-return _mm512_mask_subs_epu8(__W,__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_mask_subs_epu8(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_subs_epu8
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
-  // CHECK: icmp ugt <64 x i8> {{.*}}, {{.*}}
-  // CHECK: select <64 x i1> {{.*}}, <64 x i8> {{.*}}, <64 x i8> {{.*}}
-  // CHECK: sub <64 x i8> {{.*}}, {{.*}}
-  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
-return _mm512_maskz_subs_epu8(__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_maskz_subs_epu8(__U,__A,__B); 
 }
 __m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_subs_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
-  // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
-  // CHECK: sub <32 x i16> {{.*}}, {{.*}}
-return _mm512_subs_epu16(__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_subs_epu16(__A,__B); 
 }
 __m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask_subs_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
-  // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
-  // CHECK: sub <32 x i16> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-return _mm512_mask_subs_epu16(__W,__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_mask_subs_epu16(__W,__U,__A,__B); 
 }
 __m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_subs_epu16
-  // CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
-  // CHECK: icmp ugt <32 x i16> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16> {{.*}}
-  // CHECK: sub <32 x i16> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
-return _mm512_maskz_subs_epu16(__U,__A,__B); 
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_maskz_subs_epu16(__U,__A,__B); 
 }
 __m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi16

+ 34 - 237
test/CodeGen/avx512vlbw-builtins.c

@@ -1075,211 +1075,97 @@ __m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A,
 
 __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi8
-  // CHECK-NOT: @llvm.x86.sse2.padds.b
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16> %{{.*}}
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.padds.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi8
-  // CHECK-NOT: @llvm.x86.sse2.padds.b
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16> %{{.*}}
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.padds.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_adds_epi8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi8
-  // CHECK-NOT: @llvm.x86.avx2.padds.b
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32 x i16> %{{.*}}
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.padds.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_adds_epi8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi8
-  // CHECK-NOT: @llvm.x86.avx2.padds.b
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32 x i16> %{{.*}}
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.padds.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_adds_epi8(__U,__A,__B); 
 }
 __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epi16
-  // CHECK-NOT: @llvm.x86.sse2.padds.w
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8 x i32> %{{.*}}
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.padds.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epi16
-  // CHECK-NOT: @llvm.x86.sse2.padds.w
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8 x i32> %{{.*}}
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.padds.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_adds_epi16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epi16
-  // CHECK-NOT: @llvm.x86.avx2.padds.w
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <16 x i32> %{{.*}}
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.padds.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_adds_epi16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epi16
-  // CHECK-NOT: @llvm.x86.avx2.padds.w
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <16 x i32> %{{.*}}
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.padds.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_adds_epi16(__U,__A,__B); 
 }
-__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epu8
-  // CHECK-NOT: @llvm.x86.sse2.paddus.b
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.paddus.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_adds_epu8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epu8
-  // CHECK-NOT: @llvm.x86.sse2.paddus.b
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.paddus.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_adds_epu8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epu8
-  // CHECK-NOT: @llvm.x86.avx2.paddus.b
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.paddus.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_adds_epu8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epu8
-  // CHECK-NOT: @llvm.x86.avx2.paddus.b
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <32 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.paddus.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_adds_epu8(__U,__A,__B); 
 }
 __m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_adds_epu16
-  // CHECK-NOT: @llvm.x86.sse2.paddus.w
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.paddus.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_adds_epu16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_adds_epu16
-  // CHECK-NOT: @llvm.x86.sse2.paddus.w
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.paddus.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_adds_epu16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_adds_epu16
-  // CHECK-NOT: @llvm.x86.avx2.paddus.w
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.paddus.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_adds_epu16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_adds_epu16
-  // CHECK-NOT: @llvm.x86.avx2.paddus.w
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <16 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.paddus.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_adds_epu16(__U,__A,__B); 
 }
@@ -1633,191 +1519,102 @@ __m256i test_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
 }
 __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_subs_epi8
-  // CHECK-NOT: @llvm.x86.sse2.psubs.b
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sub <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16> %{{.*}}
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.psubs.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_subs_epi8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_subs_epi8
-  // CHECK-NOT: @llvm.x86.sse2.psubs.b
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sub <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16> %{{.*}}
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: @llvm.x86.sse2.psubs.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_subs_epi8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_subs_epi8
-  // CHECK-NOT: @llvm.x86.avx2.psubs.b
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32 x i16> %{{.*}}
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.psubs.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_subs_epi8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_subs_epi8
-  // CHECK-NOT: @llvm.x86.avx2.psubs.b
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
-  // CHECK: sub <32 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <32 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <32 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <32 x i16> %{{.*}}
-  // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
+  // CHECK: @llvm.x86.avx2.psubs.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_subs_epi8(__U,__A,__B); 
 }
 __m128i test_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_subs_epi16
-  // CHECK-NOT: @llvm.x86.sse2.psubs.w
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sub <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8 x i32> %{{.*}}
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.psubs.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_subs_epi16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_subs_epi16
-  // CHECK-NOT: @llvm.x86.sse2.psubs.w
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sub <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8 x i32> %{{.*}}
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: @llvm.x86.sse2.psubs.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_subs_epi16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_subs_epi16
-  // CHECK-NOT: @llvm.x86.avx2.psubs.w
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sub <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <16 x i32> %{{.*}}
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.psubs.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_subs_epi16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_subs_epi16
-  // CHECK-NOT: @llvm.x86.avx2.psubs.w
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sext <16 x i16> %{{.*}} to <16 x i32>
-  // CHECK: sub <16 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <16 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <16 x i32> %{{.*}}
-  // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
+  // CHECK: @llvm.x86.avx2.psubs.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_subs_epi16(__U,__A,__B); 
 }
 __m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A,        __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_subs_epu8
-  // CHECK-NOT: @llvm.x86.sse2.psubus.b
-  // CHECK: icmp ugt <16 x i8> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i8> {{.*}}, <16 x i8> {{.*}}
-  // CHECK: sub <16 x i8> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.sse2.psubus.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_mask_subs_epu8(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_subs_epu8
-  // CHECK-NOT: @llvm.x86.sse2.psubus.b
-  // CHECK: icmp ugt <16 x i8> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i8> {{.*}}, <16 x i8> {{.*}}
-  // CHECK: sub <16 x i8> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.sse2.psubus.b
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
   return _mm_maskz_subs_epu8(__U,__A,__B); 
 }
 __m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A,           __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_subs_epu8
-  // CHECK-NOT: @llvm.x86.avx2.psubus.b
-  // CHECK: icmp ugt <32 x i8> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i8> {{.*}}, <32 x i8> {{.*}}
-  // CHECK: sub <32 x i8> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.avx2.psubus.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_mask_subs_epu8(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_subs_epu8
-  // CHECK-NOT: @llvm.x86.avx2.psubus.b
-  // CHECK: icmp ugt <32 x i8> {{.*}}, {{.*}}
-  // CHECK: select <32 x i1> {{.*}}, <32 x i8> {{.*}}, <32 x i8> {{.*}}
-  // CHECK: sub <32 x i8> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.avx2.psubus.b
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
   return _mm256_maskz_subs_epu8(__U,__A,__B); 
 }
 __m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A,         __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_subs_epu16
-  // CHECK-NOT: @llvm.x86.sse2.psubus.w
-  // CHECK: icmp ugt <8 x i16> {{.*}}, {{.*}}
-  // CHECK: select <8 x i1> {{.*}}, <8 x i16> {{.*}}, <8 x i16> {{.*}}
-  // CHECK: sub <8 x i16> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.sse2.psubus.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_mask_subs_epu16(__W,__U,__A,__B); 
 }
 __m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_subs_epu16
-  // CHECK-NOT: @llvm.x86.sse2.psubus.w
-  // CHECK: icmp ugt <8 x i16> {{.*}}, {{.*}}
-  // CHECK: select <8 x i1> {{.*}}, <8 x i16> {{.*}}, <8 x i16> {{.*}}
-  // CHECK: sub <8 x i16> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.sse2.psubus.w
   // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
   return _mm_maskz_subs_epu16(__U,__A,__B); 
 }
 __m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,      __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_subs_epu16
-  // CHECK-NOT: @llvm.x86.avx2.psubus.w
-  // CHECK: icmp ugt <16 x i16> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i16> {{.*}}, <16 x i16> {{.*}}
-  // CHECK: sub <16 x i16> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.avx2.psubus.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_mask_subs_epu16(__W,__U,__A,__B); 
 }
 __m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_subs_epu16
-  // CHECK-NOT: @llvm.x86.avx2.psubus.w
-  // CHECK: icmp ugt <16 x i16> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i16> {{.*}}, <16 x i16> {{.*}}
-  // CHECK: sub <16 x i16> {{.*}}, {{.*}}
+  // CHECK: @llvm.x86.avx2.psubus.w
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_subs_epu16(__U,__A,__B); 
 }
 
+
 __m128i test_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,            __m128i __B) {
   // CHECK-LABEL: @test_mm_mask2_permutex2var_epi16
   // CHECK: @llvm.x86.avx512.mask.vpermi2var.hi.128

+ 14 - 0
test/CodeGen/no-junk-ftrunc.c

@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -S -ffp-cast-overflow-workaround %s -emit-llvm -o - | FileCheck %s
+// CHECK-LABEL: main
+// CHECK: attributes #0 = {{.*}}"fp-cast-overflow-workaround"="true"{{.*}}
+
+// The workaround attribute is not applied by default.
+
+// RUN: %clang_cc1 -S %s -emit-llvm -o - | FileCheck %s --check-prefix=DEFAULT
+// DEFAULT-LABEL: main
+// DEFAULT-NOT: fp-cast-overflow-workaround
+
+int main() {
+  return 0;
+}
+

+ 8 - 58
test/CodeGen/sse2-builtins.c

@@ -47,53 +47,25 @@ __m128d test_mm_add_sd(__m128d A, __m128d B) {
 
 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi8
-  // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16> %{{.*}}
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_adds_epi8(A, B);
 }
 
 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi16
-  // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <8 x i32> %{{.*}}, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8 x i32> %{{.*}}
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_adds_epi16(A, B);
 }
 
 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epu8
-  // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <16 x i16> %{{.*}}, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_adds_epu8(A, B);
 }
 
 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epu16
-  // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp ule <8 x i32> %{{.*}}, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_adds_epu16(A, B);
 }
 
@@ -1444,47 +1416,25 @@ __m128d test_mm_sub_sd(__m128d A, __m128d B) {
 
 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi8
-  // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sext <16 x i8> %{{.*}} to <16 x i16>
-  // CHECK: sub <16 x i16> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <16 x i16> %{{.*}}, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
-  // CHECK: icmp slt <16 x i16> %{{.*}}, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
-  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>, <16 x i16> %{{.*}}
-  // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
+  // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_subs_epi8(A, B);
 }
 
 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi16
-  // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
-  // CHECK: sub <8 x i32> %{{.*}}, %{{.*}}
-  // CHECK: icmp sle <8 x i32> %{{.*}}, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
-  // CHECK: icmp slt <8 x i32> %{{.*}},  <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-  // CHECK: select <8 x i1> %{{.*}}, <8 x i32>  <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>, <8 x i32> %{{.*}}
-  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_subs_epi16(A, B);
 }
 
 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epu8
-  // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: icmp ugt <16 x i8> {{.*}}, {{.*}}
-  // CHECK: select <16 x i1> {{.*}}, <16 x i8> {{.*}}, <16 x i8> {{.*}}
-  // CHECK: sub <16 x i8> {{.*}}, {{.*}}
+  // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_subs_epu8(A, B);
 }
 
 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epu16
-  // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: icmp ugt <8 x i16> {{.*}}, {{.*}}
-  // CHECK: select <8 x i1> {{.*}}, <8 x i16> {{.*}}, <8 x i16> {{.*}}
-  // CHECK: sub <8 x i16> {{.*}}, {{.*}}
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_subs_epu16(A, B);
 }
 

+ 6 - 3
test/CodeGenCXX/lambda-expressions-inside-auto-functions.cpp

@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -fblocks -emit-llvm -o - %s -fexceptions -std=c++1y | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -fblocks -emit-llvm -o - %s -fexceptions -std=c++1y | FileCheck --check-prefix CHECK_ABI_LATEST %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -fblocks -emit-llvm -o - %s -fexceptions -std=c++1y -fclang-abi-compat=6.0 | FileCheck --check-prefix CHECK_ABIV6 %s
 
 // CHECK-LABEL: define void @_ZN19non_inline_function3fooEv
 // CHECK-LABEL: define internal void @"_ZZN19non_inline_function3fooEvENK3$_0clEi"(%class.anon
@@ -52,10 +53,12 @@ struct A {
   
   template<class T> auto foo() { return [](const T&) { return 42; }; }
 };
-//CHECK-LABEL: define linkonce_odr i32 @_ZZN22inline_member_function1AIdE14default_lambdaIdEEDavENKUlRKdE_clES5_(%class.anon
+//CHECK_ABIV6: define linkonce_odr i32 @_ZZN22inline_member_function1AIdE14default_lambdaIdEEDavENKUlRKdE_clES5_(%class.anon
+//CHECK_ABI_LATEST: define linkonce_odr i32 @_ZZN22inline_member_function1AIdE14default_lambdaIdEEDavENKUlRKdE_clES4_(%class.anon
 int run2 = A<double>{}.func()(3.14);
 
-//CHECK-LABEL: define linkonce_odr i32 @_ZZN22inline_member_function1AIcE14default_lambdaIcEEDavENKUlRKcE_clES5_(%class.anon
+//CHECK_ABIV6: define linkonce_odr i32 @_ZZN22inline_member_function1AIcE14default_lambdaIcEEDavENKUlRKcE_clES5_(%class.anon
+//CHECK_ABI_LATEST: define linkonce_odr i32 @_ZZN22inline_member_function1AIcE14default_lambdaIcEEDavENKUlRKcE_clES4_(%class.anon
 int run3 = A<char>{}.func()('a');
 } // end inline_member_function
 

+ 18 - 0
test/CodeGenCXX/lambda-to-function-pointer-conversion.cpp

@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc19.0.0 -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+// This code used to cause an assertion failure in EmitDelegateCallArg.
+
+// CHECK: define internal void @"?__invoke@<lambda_0>@?0??test@@YAXXZ@CA@UTrivial@@@Z"(
+// CHECK: call void @"??R<lambda_0>@?0??test@@YAXXZ@QEBA@UTrivial@@@Z"(
+
+// CHECK: define internal void @"??R<lambda_0>@?0??test@@YAXXZ@QEBA@UTrivial@@@Z"(
+
+struct Trivial {
+  int x;
+};
+
+void (*fnptr)(Trivial);
+
+void test() {
+  fnptr = [](Trivial a){ (void)a; };
+}

+ 20 - 0
test/CodeGenObjCXX/arc-forwarded-lambda-call.mm

@@ -10,6 +10,17 @@ void test0(id x) {
   // CHECK-NEXT: ret i8* [[T2]]
 }
 
+// Check that the delegating block invoke function doesn't destruct the Weak
+// object that is passed.
+
+// CHECK-LABEL: define internal void @___Z8testWeakv_block_invoke(
+// CHECK: call void @"_ZZ8testWeakvENK3$_2clE4Weak"(
+// CHECK-NEXT: ret void
+
+// CHECK-LABEL: define internal void @"_ZZ8testWeakvENK3$_2clE4Weak"(
+// CHECK: call void @_ZN4WeakD1Ev(
+// CHECK-NEXT: ret void
+
 id test1_rv;
 
 void test1() {
@@ -21,3 +32,12 @@ void test1() {
   // CHECK-NEXT: [[T2:%.*]] = tail call i8* @objc_autoreleaseReturnValue(i8* [[T1]])
   // CHECK-NEXT: ret i8* [[T2]]
 }
+
+struct Weak {
+  __weak id x;
+};
+
+void testWeak() {
+  extern void testWeak_helper(void (^)(Weak));
+  testWeak_helper([](Weak){});
+}

+ 61 - 1
test/CodeGenObjCXX/arc-special-member-functions.mm

@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fobjc-arc -fblocks -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -fobjc-arc -fblocks -triple x86_64-apple-darwin10.0.0 -fobjc-runtime-has-weak -emit-llvm -o - %s | FileCheck %s
 
 struct ObjCMember {
   id member;
@@ -12,6 +12,59 @@ struct ObjCBlockMember {
   int (^bp)(int);
 };
 
+// CHECK: %[[STRUCT_CONTAINSWEAK:.*]] = type { %[[STRUCT_WEAK:.*]] }
+// CHECK: %[[STRUCT_WEAK]] = type { i8* }
+
+// The Weak object that is passed is destructed in this constructor.
+
+// CHECK: define void @_ZN12ContainsWeakC2E4Weak(
+// CHECK: call void @_ZN4WeakC1ERKS_(
+// CHECK: call void @_ZN4WeakD1Ev(
+
+// Check that the Weak object passed to this constructor is not destructed after
+// the delegate constructor is called.
+
+// CHECK: define void @_ZN12ContainsWeakC1E4Weak(
+// CHECK: call void @_ZN12ContainsWeakC2E4Weak(
+// CHECK-NEXT: ret void
+
+struct Weak {
+  Weak(id);
+  __weak id x;
+};
+
+struct ContainsWeak {
+  ContainsWeak(Weak);
+  Weak w;
+};
+
+ContainsWeak::ContainsWeak(Weak a) : w(a) {}
+
+// The Weak object that is passed is destructed in this constructor.
+
+// CHECK: define void @_ZN4BaseC2E4Weak(
+// CHECK: call void @_ZN4WeakD1Ev(
+// CHECK: ret void
+
+// Check that the Weak object passed to this constructor is not destructed after
+// the delegate constructor is called.
+
+// CHECK: define linkonce_odr void @_ZN7DerivedCI14BaseE4Weak(
+// CHECK: call void @_ZN7DerivedCI24BaseE4Weak(
+// CHECK-NEXT: ret void
+
+struct Base {
+  Base(Weak);
+};
+
+Base::Base(Weak a) {}
+
+struct Derived : Base {
+  using Base::Base;
+};
+
+Derived d(Weak(0));
+
 // CHECK-LABEL: define void @_Z42test_ObjCMember_default_construct_destructv(
 void test_ObjCMember_default_construct_destruct() {
   // CHECK: call void @_ZN10ObjCMemberC1Ev
@@ -111,6 +164,13 @@ void test_ObjCBlockMember_copy_assign(ObjCBlockMember m1, ObjCBlockMember m2) {
 // CHECK-NEXT: call void @objc_release(i8* [[T7]])
 // CHECK-NEXT: ret
 
+// Check that the Weak object passed to this constructor is not destructed after
+// the delegate constructor is called.
+
+// CHECK: define linkonce_odr void @_ZN7DerivedCI24BaseE4Weak(
+// CHECK: call void @_ZN4BaseC2E4Weak(
+// CHECK-NEXT: ret void
+
 // Implicitly-generated default constructor for ObjCMember
 // CHECK-LABEL: define linkonce_odr void @_ZN10ObjCMemberC2Ev
 // CHECK-NOT: objc_release

+ 27 - 1
test/CodeGenObjCXX/lambda-expressions.mm

@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -fblocks -fobjc-arc | FileCheck -check-prefix=ARC %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -fblocks -fobjc-arc -fobjc-runtime-has-weak -DWEAK_SUPPORTED | FileCheck -check-prefix=ARC %s
 // RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -fblocks | FileCheck -check-prefix=MRC %s
 
 typedef int (^fp)();
@@ -138,5 +138,31 @@ namespace BlockInLambda {
 }
 @end
 
+// Check that the delegating invoke function doesn't destruct the Weak object
+// that is passed.
+
+// ARC-LABEL: define internal void @"_ZZN14LambdaDelegate4testEvEN3$_58__invokeENS_4WeakE"(
+// ARC: call void @"_ZZN14LambdaDelegate4testEvENK3$_5clENS_4WeakE"(
+// ARC-NEXT: ret void
+
+// ARC-LABEL: define internal void @"_ZZN14LambdaDelegate4testEvENK3$_5clENS_4WeakE"(
+// ARC: call void @_ZN14LambdaDelegate4WeakD1Ev(
+
+#ifdef WEAK_SUPPORTED
+
+namespace LambdaDelegate {
+
+struct Weak {
+  __weak id x;
+};
+
+void test() {
+  void (*p)(Weak) = [](Weak a) { };
+}
+
+};
+
+#endif
+
 // ARC: attributes [[NUW]] = { noinline nounwind{{.*}} }
 // MRC: attributes [[NUW]] = { noinline nounwind{{.*}} }

+ 7 - 3
test/CodeGenOpenCL/opencl_types.cl

@@ -63,9 +63,13 @@ kernel void foo(image1d_t img) {
   // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
 }
 
-kernel void foo_pipe(read_only pipe int p) {}
-// CHECK-SPIR: @foo_pipe(%opencl.pipe_t addrspace(1)* %p)
-// CHECK_AMDGCN: @foo_pipe(%opencl.pipe_t addrspace(1)* %p)
+kernel void foo_ro_pipe(read_only pipe int p) {}
+// CHECK-SPIR: @foo_ro_pipe(%opencl.pipe_ro_t addrspace(1)* %p)
+// CHECK_AMDGCN: @foo_ro_pipe(%opencl.pipe_ro_t addrspace(1)* %p)
+
+kernel void foo_wo_pipe(write_only pipe int p) {}
+// CHECK-SPIR: @foo_wo_pipe(%opencl.pipe_wo_t addrspace(1)* %p)
+// CHECK_AMDGCN: @foo_wo_pipe(%opencl.pipe_wo_t addrspace(1)* %p)
 
 void __attribute__((overloadable)) bad1(image1d_t b, image2d_t c, image2d_t d) {}
 // CHECK-SPIR-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}

+ 40 - 26
test/CodeGenOpenCL/pipe_builtin.cl

@@ -1,68 +1,76 @@
 // RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
 
-// CHECK: %opencl.pipe_t = type opaque
-// CHECK: %opencl.reserve_id_t = type opaque
+// CHECK-DAG: %opencl.pipe_ro_t = type opaque
+// CHECK-DAG: %opencl.pipe_wo_t = type opaque
+// CHECK-DAG: %opencl.reserve_id_t = type opaque
 
 #pragma OPENCL EXTENSION cl_khr_subgroups : enable
 
 void test1(read_only pipe int p, global int *ptr) {
-  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
   read_pipe(p, ptr);
-  // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
   reserve_id_t rid = reserve_read_pipe(p, 2);
-  // CHECK: call i32 @__read_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  // CHECK: call i32 @__read_pipe_4(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
   read_pipe(p, rid, 2, ptr);
-  // CHECK: call void @__commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call void @__commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
   commit_read_pipe(p, rid);
 }
 
 void test2(write_only pipe int p, global int *ptr) {
-  // CHECK: call i32 @__write_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  // CHECK: call i32 @__write_pipe_2(%opencl.pipe_wo_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
   write_pipe(p, ptr);
-  // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
   reserve_id_t rid = reserve_write_pipe(p, 2);
-  // CHECK: call i32 @__write_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+  // CHECK: call i32 @__write_pipe_4(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
   write_pipe(p, rid, 2, ptr);
-  // CHECK: call void @__commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call void @__commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
   commit_write_pipe(p, rid);
 }
 
 void test3(read_only pipe int p, global int *ptr) {
-  // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
   reserve_id_t rid = work_group_reserve_read_pipe(p, 2);
-  // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
   work_group_commit_read_pipe(p, rid);
 }
 
 void test4(write_only pipe int p, global int *ptr) {
-  // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
   reserve_id_t rid = work_group_reserve_write_pipe(p, 2);
-  // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
   work_group_commit_write_pipe(p, rid);
 }
 
 void test5(read_only pipe int p, global int *ptr) {
-  // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
   reserve_id_t rid = sub_group_reserve_read_pipe(p, 2);
-  // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
   sub_group_commit_read_pipe(p, rid);
 }
 
 void test6(write_only pipe int p, global int *ptr) {
-  // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+  // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
   reserve_id_t rid = sub_group_reserve_write_pipe(p, 2);
-  // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
   sub_group_commit_write_pipe(p, rid);
 }
 
-void test7(write_only pipe int p, global int *ptr) {
-  // CHECK: call i32 @__get_pipe_num_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4)
+void test7(read_only pipe int p, global int *ptr) {
+  // CHECK: call i32 @__get_pipe_num_packets_ro(%opencl.pipe_ro_t* %{{.*}}, i32 4, i32 4)
   *ptr = get_pipe_num_packets(p);
-  // CHECK: call i32 @__get_pipe_max_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4)
+  // CHECK: call i32 @__get_pipe_max_packets_ro(%opencl.pipe_ro_t* %{{.*}}, i32 4, i32 4)
   *ptr = get_pipe_max_packets(p);
 }
 
-void test8(read_only pipe int r, write_only pipe int w, global int *ptr) {
+void test8(write_only pipe int p, global int *ptr) {
+  // CHECK: call i32 @__get_pipe_num_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4)
+  *ptr = get_pipe_num_packets(p);
+  // CHECK: call i32 @__get_pipe_max_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4)
+  *ptr = get_pipe_max_packets(p);
+}
+
+void test9(read_only pipe int r, write_only pipe int w, global int *ptr) {
   // verify that return type is correctly casted to i1 value
   // CHECK: %[[R:[0-9]+]] = call i32 @__read_pipe_2
   // CHECK: icmp ne i32 %[[R]], 0
@@ -70,10 +78,16 @@ void test8(read_only pipe int r, write_only pipe int w, global int *ptr) {
   // CHECK: %[[W:[0-9]+]] = call i32 @__write_pipe_2
   // CHECK: icmp ne i32 %[[W]], 0
   if (write_pipe(w, ptr)) *ptr = -1;
-  // CHECK: %[[N:[0-9]+]] = call i32 @__get_pipe_num_packets
-  // CHECK: icmp ne i32 %[[N]], 0
+  // CHECK: %[[NR:[0-9]+]] = call i32 @__get_pipe_num_packets_ro
+  // CHECK: icmp ne i32 %[[NR]], 0
   if (get_pipe_num_packets(r)) *ptr = -1;
-  // CHECK: %[[M:[0-9]+]] = call i32 @__get_pipe_max_packets
-  // CHECK: icmp ne i32 %[[M]], 0
+  // CHECK: %[[NW:[0-9]+]] = call i32 @__get_pipe_num_packets_wo
+  // CHECK: icmp ne i32 %[[NW]], 0
+  if (get_pipe_num_packets(w)) *ptr = -1;
+  // CHECK: %[[MR:[0-9]+]] = call i32 @__get_pipe_max_packets_ro
+  // CHECK: icmp ne i32 %[[MR]], 0
+  if (get_pipe_max_packets(r)) *ptr = -1;
+  // CHECK: %[[MW:[0-9]+]] = call i32 @__get_pipe_max_packets_wo
+  // CHECK: icmp ne i32 %[[MW]], 0
   if (get_pipe_max_packets(w)) *ptr = -1;
 }

+ 10 - 9
test/CodeGenOpenCL/pipe_types.cl

@@ -1,34 +1,35 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
 
-// CHECK: %opencl.pipe_t = type opaque
+// CHECK: %opencl.pipe_ro_t = type opaque
+// CHECK: %opencl.pipe_wo_t = type opaque
 typedef unsigned char __attribute__((ext_vector_type(3))) uchar3;
 typedef int __attribute__((ext_vector_type(4))) int4;
 
 void test1(read_only pipe int p) {
-// CHECK: define void @test1(%opencl.pipe_t* %p)
+// CHECK: define void @test1(%opencl.pipe_ro_t* %p)
   reserve_id_t rid;
 // CHECK: %rid = alloca %opencl.reserve_id_t
 }
 
 void test2(write_only pipe float p) {
-// CHECK: define void @test2(%opencl.pipe_t* %p)
+// CHECK: define void @test2(%opencl.pipe_wo_t* %p)
 }
 
 void test3(read_only pipe const int p) {
-// CHECK: define void @test3(%opencl.pipe_t* %p)
+// CHECK: define void @test3(%opencl.pipe_ro_t* %p)
 }
 
 void test4(read_only pipe uchar3 p) {
-// CHECK: define void @test4(%opencl.pipe_t* %p)
+// CHECK: define void @test4(%opencl.pipe_ro_t* %p)
 }
 
 void test5(read_only pipe int4 p) {
-// CHECK: define void @test5(%opencl.pipe_t* %p)
+// CHECK: define void @test5(%opencl.pipe_ro_t* %p)
 }
 
 typedef read_only pipe int MyPipe;
 kernel void test6(MyPipe p) {
-// CHECK: define spir_kernel void @test6(%opencl.pipe_t* %p)
+// CHECK: define spir_kernel void @test6(%opencl.pipe_ro_t* %p)
 }
 
 struct Person {
@@ -41,7 +42,7 @@ void test_reserved_read_pipe(global struct Person *SDst,
                              read_only pipe struct Person SPipe) {
 // CHECK: define void @test_reserved_read_pipe
   read_pipe (SPipe, SDst);
-  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
+  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
   read_pipe (SPipe, SDst);
-  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
+  // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
 }

+ 2 - 0
test/Driver/cxa-atexit.cpp

@@ -31,6 +31,8 @@
 // RUN: FileCheck --check-prefix=WITHATEXIT %s
 // RUN: %clang -target x86_64-apple-darwin -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHATEXIT %s
+// RUN: %clang -target x86_64-apple-darwin -c -mkernel -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
 // RUN: %clang -target x86_64-pc-linux-gnu -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
 // RUN: %clang -target x86_64-pc-linux-gnu -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \

+ 24 - 0
test/Driver/fast-math.c

@@ -287,3 +287,27 @@
 // RUN: %clang -### -ftrapping-math -fno-trapping-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-TRAPPING-MATH %s
 // CHECK-NO-TRAPPING-MATH: "-fno-trapping-math"
+
+// This isn't fast-math, but the option is handled in the same place as other FP params.
+// Last option wins, and the flag is *not* passed by default. 
+
+// RUN: %clang -### -ffp-cast-overflow-workaround -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FPOV-WORKAROUND %s
+// CHECK-FPOV-WORKAROUND: "-cc1"
+// CHECK-FPOV-WORKAROUND: "-ffp-cast-overflow-workaround"
+
+// RUN: %clang -### -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-FPOV-WORKAROUND-DEFAULT %s
+// CHECK-FPOV-WORKAROUND-DEFAULT: "-cc1"
+// CHECK-FPOV-WORKAROUND-DEFAULT-NOT: "-ffp-cast-overflow-workaround"
+
+// RUN: %clang -### -fno-fp-cast-overflow-workaround -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FPOV-WORKAROUND %s
+// CHECK-NO-FPOV-WORKAROUND: "-cc1"
+// CHECK-NO-FPOV-WORKAROUND-NOT: "-ffp-cast-overflow-workaround"
+
+// RUN: %clang -### -ffp-cast-overflow-workaround -fno-fp-cast-overflow-workaround -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FPOV-WORKAROUND-OVERRIDE %s
+// CHECK-NO-FPOV-WORKAROUND-OVERRIDE: "-cc1"
+// CHECK-NO-FPOV-WORKAROUND-OVERRIDE-NOT: "-ffp-cast-overflow-workaround"
+

+ 4 - 4
test/Index/pipe-size.cl

@@ -5,12 +5,12 @@
 __kernel void testPipe( pipe int test )
 {
     int s = sizeof(test);
-    // X86: store %opencl.pipe_t* %test, %opencl.pipe_t** %test.addr, align 8
+    // X86: store %opencl.pipe_ro_t* %test, %opencl.pipe_ro_t** %test.addr, align 8
     // X86: store i32 8, i32* %s, align 4
-    // SPIR: store %opencl.pipe_t addrspace(1)* %test, %opencl.pipe_t addrspace(1)** %test.addr, align 4
+    // SPIR: store %opencl.pipe_ro_t addrspace(1)* %test, %opencl.pipe_ro_t addrspace(1)** %test.addr, align 4
     // SPIR: store i32 4, i32* %s, align 4
-    // SPIR64: store %opencl.pipe_t addrspace(1)* %test, %opencl.pipe_t addrspace(1)** %test.addr, align 8
+    // SPIR64: store %opencl.pipe_ro_t addrspace(1)* %test, %opencl.pipe_ro_t addrspace(1)** %test.addr, align 8
     // SPIR64: store i32 8, i32* %s, align 4
-    // AMDGCN: store %opencl.pipe_t addrspace(1)* %test, %opencl.pipe_t addrspace(1)* addrspace(5)* %test.addr, align 8
+    // AMDGCN: store %opencl.pipe_ro_t addrspace(1)* %test, %opencl.pipe_ro_t addrspace(1)* addrspace(5)* %test.addr, align 8
     // AMDGCN: store i32 8, i32 addrspace(5)* %s, align 4
 }

+ 1 - 1
test/Misc/diag-format.c

@@ -37,7 +37,7 @@
 // DEFAULT: {{.*}}:36:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
 // MSVC2010: {{.*}}(36,7) : warning: extra tokens at end of #endif directive [-Wextra-tokens]
 // MSVC2013: {{.*}}(36,8) : warning: extra tokens at end of #endif directive [-Wextra-tokens]
-// MSVC: {{.*}}(36,8){{ ?}}: warning: extra tokens at end of #endif directive [-Wextra-tokens]
+// MSVC: {{.*\(36,[78]\) ?}}: warning: extra tokens at end of #endif directive [-Wextra-tokens]
 // MSVC2015: {{.*}}(36,8): warning: extra tokens at end of #endif directive [-Wextra-tokens]
 // VI: {{.*}} +36:8: warning: extra tokens at end of #endif directive [-Wextra-tokens]
 // MSVC2015_ORIG: {{.*}}(36): warning: extra tokens at end of #endif directive [-Wextra-tokens]

+ 5 - 0
test/Modules/Inputs/class-extension/a-private.h

@@ -0,0 +1,5 @@
+#import "a.h"
+#import "a-proto.h"
+
+@interface A () <AProto>
+@end

+ 7 - 0
test/Modules/Inputs/class-extension/a-proto.h

@@ -0,0 +1,7 @@
+@protocol NSObject
+@end
+
+@protocol AProto <NSObject>
+@property (nonatomic, readwrite, assign) int p0;
+@property (nonatomic, readwrite, assign) int p1;
+@end

+ 5 - 0
test/Modules/Inputs/class-extension/a.h

@@ -0,0 +1,5 @@
+@interface NSObject
+@end
+
+@interface A : NSObject
+@end

+ 11 - 0
test/Modules/Inputs/class-extension/module.modulemap

@@ -0,0 +1,11 @@
+
+module A {
+  header "a.h"
+  header "a-proto.h"
+  export *
+}
+
+module AP {
+  header "a-private.h"
+  export *
+}

+ 9 - 0
test/Modules/class-extension-protocol.m

@@ -0,0 +1,9 @@
+// RUN: rm -rf %t.cache
+// RUN: %clang_cc1 %s -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.cache -I%S/Inputs/class-extension -verify
+// expected-no-diagnostics
+
+#import "a-private.h"
+
+int foo(A *X) {
+  return X.p0 + X.p1;
+}

+ 3 - 2
test/Parser/cxx-decl.cpp

@@ -237,10 +237,11 @@ namespace PR5066 {
 namespace PR17255 {
 void foo() {
   typename A::template B<>; // expected-error {{use of undeclared identifier 'A'}}
+  // expected-error@-1 {{'template' keyword not permitted here}}
 #if __cplusplus <= 199711L
-  // expected-error@-2 {{'template' keyword outside of a template}}
+  // expected-error@-3 {{'template' keyword outside of a template}}
 #endif
-  // expected-error@-4 {{expected a qualified name after 'typename'}}
+  // expected-error@-5 {{expected a qualified name after 'typename'}}
 }
 }
 

+ 5 - 0
test/Preprocessor/arm-target-features.c

@@ -6,6 +6,7 @@
 // CHECK-V8A: #define __ARM_FEATURE_DIRECTED_ROUNDING 1
 // CHECK-V8A: #define __ARM_FEATURE_NUMERIC_MAXMIN 1
 // CHECK-V8A-NOT: #define __ARM_FP 0x
+// CHECK-V8A-NOT: #define __ARM_FEATURE_DOTPROD
 
 // RUN: %clang -target armv8a-none-linux-gnueabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s
 // RUN: %clang -target armv8a-none-linux-gnueabihf -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8A-ALLOW-FP-INSTR %s
@@ -18,6 +19,7 @@
 // CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FP 0xe
 // CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FP16_ARGS 1
 // CHECK-V8A-ALLOW-FP-INSTR: #define __ARM_FP16_FORMAT_IEEE 1
+// CHECK-V8A-ALLOW-FP-INSTR-V8A-NOT: #define __ARM_FEATURE_DOTPROD
 
 // RUN: %clang -target arm-none-linux-gnueabi -march=armv8.2a+fp16 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-FULLFP16-VECTOR-SCALAR %s
 // CHECK-FULLFP16-VECTOR-SCALAR: #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
@@ -30,6 +32,9 @@
 // CHECK-FULLFP16-SCALAR-NOT:   #define __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 1
 // CHECK-FULLFP16-SCALAR:       #define __ARM_FP 0xe
 // CHECK-FULLFP16-SCALAR:       #define __ARM_FP16_FORMAT_IEEE 1
+//
+// RUN: %clang -target arm-none-linux-gnueabi -march=armv8.2a+dotprod -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DOTPROD %s
+// CHECK-DOTPROD: #define __ARM_FEATURE_DOTPROD 1
 
 // RUN: %clang -target armv8r-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V8R %s
 // CHECK-V8R: #define __ARMEL__ 1

+ 5 - 0
test/Sema/builtins-x86.c

@@ -16,6 +16,11 @@ typedef unsigned char __mmask8;
 typedef unsigned short __mmask16;
 typedef unsigned int __mmask32;
 
+void call_x86_32_builtins(void) {
+  (void)__builtin_ia32_readeflags_u32();                             // expected-error{{this builtin is only available on 32-bit targets}}
+  (void)__builtin_ia32_writeeflags_u32(4);                           // expected-error{{this builtin is only available on 32-bit targets}}
+}
+
 __m128 test__builtin_ia32_cmpps(__m128 __a, __m128 __b) {
   __builtin_ia32_cmpps(__a, __b, 32); // expected-error {{argument should be a value from 0 to 31}}
 }

+ 4 - 4
test/SemaCXX/cxx1y-variable-templates_in_class.cpp

@@ -382,16 +382,16 @@ int main() {
 
 namespace dependent_static_var_template {
   struct A {
-    template<int = 0> static int n; // expected-note {{here}}
+    template<int = 0> static int n; // expected-note 2{{here}}
   };
-  int &r = A::template n; // FIXME: ill-formed
+  int &r = A::template n; // expected-error {{use of variable template 'n' requires template arguments}}
 
   template<typename T>
   int &f() { return T::template n; } // expected-error {{use of variable template 'n' requires template arguments}}
   int &s = f<A>(); // expected-note {{instantiation of}}
 
   namespace B {
-    template<int = 0> static int n;
+    template<int = 0> static int n; // expected-note {{here}}
   }
-  int &t = B::template n; // FIXME: ill-formed
+  int &t = B::template n; // expected-error {{use of variable template 'n' requires template arguments}}
 }

+ 46 - 0
test/SemaTemplate/template-id-expr.cpp

@@ -85,6 +85,52 @@ struct Y0 {
   }
 };
 
+template<typename U> void Y0
+  ::template // expected-error {{expected unqualified-id}}
+    f1(U) {}
+
+// FIXME: error recovery is awful without this.
+    ;
+
+template<typename T>
+struct Y1 {
+  template<typename U>
+  void f1(U);
+
+  template<typename U>
+  static void f2(U);
+
+  void f3(int);
+
+  static int f4(int);
+  template<typename U>
+  static void f4(U);
+
+  template<typename U>
+  void f() {
+    Y1::template f1<U>(0);
+    Y1::template f1(0);
+    this->template f1(0);
+
+    Y1::template f2<U>(0);
+    Y1::template f2(0);
+
+    Y1::template f3(0); // expected-error {{'f3' following the 'template' keyword does not refer to a template}}
+    Y1::template f3(); // expected-error {{'f3' following the 'template' keyword does not refer to a template}}
+
+    int x;
+    x = Y1::f4(0);
+    x = Y1::f4<int>(0); // expected-error {{assigning to 'int' from incompatible type 'void'}}
+    x = Y1::template f4(0); // expected-error {{assigning to 'int' from incompatible type 'void'}}
+
+    x = this->f4(0);
+    x = this->f4<int>(0); // expected-error {{assigning to 'int' from incompatible type 'void'}}
+    x = this->template f4(0); // expected-error {{assigning to 'int' from incompatible type 'void'}}
+  }
+};
+
+void use_Y1(Y1<int> y1) { y1.f<int>(); } // expected-note {{in instantiation of}}
+
 struct A {
   template<int I>
   struct B {

+ 18 - 13
tools/driver/cc1as_main.cpp

@@ -406,17 +406,19 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
   if (Opts.OutputType == AssemblerInvocation::FT_Asm) {
     MCInstPrinter *IP = TheTarget->createMCInstPrinter(
         llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI);
-    MCCodeEmitter *CE = nullptr;
-    MCAsmBackend *MAB = nullptr;
-    if (Opts.ShowEncoding) {
-      CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
-      MCTargetOptions Options;
-      MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options);
-    }
+
+    std::unique_ptr<MCCodeEmitter> CE;
+    if (Opts.ShowEncoding)
+      CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+    MCTargetOptions MCOptions;
+    std::unique_ptr<MCAsmBackend> MAB(
+        TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions));
+
     auto FOut = llvm::make_unique<formatted_raw_ostream>(*Out);
     Str.reset(TheTarget->createAsmStreamer(
         Ctx, std::move(FOut), /*asmverbose*/ true,
-        /*useDwarfDirectory*/ true, IP, CE, MAB, Opts.ShowInst));
+        /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB),
+        Opts.ShowInst));
   } else if (Opts.OutputType == AssemblerInvocation::FT_Null) {
     Str.reset(createNullStreamer(Ctx));
   } else {
@@ -427,13 +429,16 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
       Out = BOS.get();
     }
 
-    MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
-    MCTargetOptions Options;
-    MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, Options);
+    std::unique_ptr<MCCodeEmitter> CE(
+        TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+    MCTargetOptions MCOptions;
+    std::unique_ptr<MCAsmBackend> MAB(
+        TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions));
+
     Triple T(Opts.Triple);
     Str.reset(TheTarget->createMCObjectStreamer(
-                T, Ctx, std::unique_ptr<MCAsmBackend>(MAB), *Out, std::unique_ptr<MCCodeEmitter>(CE), *STI,
-        Opts.RelaxAll, Opts.IncrementalLinkerCompatible,
+        T, Ctx, std::move(MAB), *Out, std::move(CE), *STI, Opts.RelaxAll,
+        Opts.IncrementalLinkerCompatible,
         /*DWARFMustBeAtTheEnd*/ true));
     Str.get()->InitSections(Opts.NoExecStack);
   }

+ 2 - 2
tools/driver/driver.cpp

@@ -485,7 +485,7 @@ int main(int argc_, const char **argv_) {
       // On Windows, abort will return an exit code of 3.  In these cases,
       // generate additional diagnostic information if possible.
       bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70;
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
       DiagnoseCrash |= CommandRes == 3;
 #endif
       if (DiagnoseCrash) {
@@ -501,7 +501,7 @@ int main(int argc_, const char **argv_) {
   // results now.  This happens in -disable-free mode.
   llvm::TimerGroup::printAll(llvm::errs());
 
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   // Exit status should not be negative on Win32, unless abnormal termination.
   // Once abnormal termiation was caught, negative status should not be
   // propagated.

+ 1 - 1
tools/libclang/CIndex.cpp

@@ -8472,7 +8472,7 @@ void cxindex::printDiagsToStderr(ASTUnit *Unit) {
     fprintf(stderr, "%s\n", clang_getCString(Msg));
     clang_disposeString(Msg);
   }
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   // On Windows, force a flush, since there may be multiple copies of
   // stderr and stdout in the file system, all with different buffers
   // but writing to the same device.

+ 3 - 3
tools/libclang/CIndexer.cpp

@@ -28,10 +28,10 @@
 #ifdef __CYGWIN__
 #include <cygwin/version.h>
 #include <sys/cygwin.h>
-#define LLVM_ON_WIN32 1
+#define _WIN32 1
 #endif
 
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
 #include <windows.h>
 #else
 #include <dlfcn.h>
@@ -47,7 +47,7 @@ const std::string &CIndexer::getClangResourcesPath() {
   SmallString<128> LibClangPath;
 
   // Find the location where this library lives (libclang.dylib).
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   MEMORY_BASIC_INFORMATION mbi;
   char path[MAX_PATH];
   VirtualQuery((void *)(uintptr_t)clang_createTranslationUnit, &mbi,

+ 2 - 2
unittests/ASTMatchers/ASTMatchersInternalTest.cpp

@@ -189,7 +189,7 @@ TEST(IsInlineMatcher, IsInline) {
 
 // FIXME: Figure out how to specify paths so the following tests pass on
 // Windows.
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
 
 TEST(Matcher, IsExpansionInMainFileMatcher) {
   EXPECT_TRUE(matches("class X {};",
@@ -234,7 +234,7 @@ TEST(Matcher, IsExpansionInFileMatching) {
       "-isystem/", M));
 }
 
-#endif // LLVM_ON_WIN32
+#endif // _WIN32
 
 } // end namespace ast_matchers
 } // end namespace clang

+ 7 - 7
unittests/Basic/FileManagerTest.cpp

@@ -31,7 +31,7 @@ private:
   llvm::StringMap<FileData, llvm::BumpPtrAllocator> StatCalls;
 
   void InjectFileOrDirectory(const char *Path, ino_t INode, bool IsFile) {
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
     SmallString<128> NormalizedPath(Path);
     llvm::sys::path::native(NormalizedPath);
     Path = NormalizedPath.c_str();
@@ -63,7 +63,7 @@ public:
   LookupResult getStat(StringRef Path, FileData &Data, bool isFile,
                        std::unique_ptr<vfs::File> *F,
                        vfs::FileSystem &FS) override {
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
     SmallString<128> NormalizedPath(Path);
     llvm::sys::path::native(NormalizedPath);
     Path = NormalizedPath.c_str();
@@ -143,7 +143,7 @@ TEST_F(FileManagerTest, getFileReturnsValidFileEntryForExistingRealFile) {
   statCache->InjectDirectory("/tmp", 42);
   statCache->InjectFile("/tmp/test", 43);
 
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   const char *DirName = "C:.";
   const char *FileName = "C:test";
   statCache->InjectDirectory(DirName, 44);
@@ -161,7 +161,7 @@ TEST_F(FileManagerTest, getFileReturnsValidFileEntryForExistingRealFile) {
   ASSERT_TRUE(dir != nullptr);
   EXPECT_EQ("/tmp", dir->getName());
 
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   file = manager.getFile(FileName);
   ASSERT_TRUE(file != NULL);
 
@@ -225,7 +225,7 @@ TEST_F(FileManagerTest, getFileReturnsNULLForNonexistentFile) {
 
 // The following tests apply to Unix-like system only.
 
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
 
 // getFile() returns the same FileEntry for real files that are aliases.
 TEST_F(FileManagerTest, getFileReturnsSameFileEntryForAliasedRealFiles) {
@@ -295,11 +295,11 @@ TEST_F(FileManagerTest, getVirtualFileWithDifferentName) {
   EXPECT_EQ(123, file2->getSize());
 }
 
-#endif  // !LLVM_ON_WIN32
+#endif  // !_WIN32
 
 TEST_F(FileManagerTest, makeAbsoluteUsesVFS) {
   SmallString<64> CustomWorkingDir;
-#ifdef LLVM_ON_WIN32
+#ifdef _WIN32
   CustomWorkingDir = "C:";
 #else
   CustomWorkingDir = "/";

+ 2 - 2
unittests/Driver/ToolChainTest.cpp

@@ -69,7 +69,7 @@ TEST(ToolChainTest, VFSGCCInstallation) {
     llvm::raw_string_ostream OS(S);
     C->getDefaultToolChain().printVerboseInfo(OS);
   }
-#if LLVM_ON_WIN32
+#if _WIN32
   std::replace(S.begin(), S.end(), '\\', '/');
 #endif
   EXPECT_EQ(
@@ -109,7 +109,7 @@ TEST(ToolChainTest, VFSGCCInstallationRelativeDir) {
     llvm::raw_string_ostream OS(S);
     C->getDefaultToolChain().printVerboseInfo(OS);
   }
-#if LLVM_ON_WIN32
+#if _WIN32
   std::replace(S.begin(), S.end(), '\\', '/');
 #endif
   EXPECT_EQ("Found candidate GCC installation: "

+ 12 - 1
unittests/Format/FormatTestObjC.cpp

@@ -945,7 +945,7 @@ TEST_F(FormatTestObjC, ObjCForIn) {
                "     }]) {\n}");
 }
 
-TEST_F(FormatTestObjC, ObjCNew) {
+TEST_F(FormatTestObjC, ObjCCxxKeywords) {
   verifyFormat("+ (instancetype)new {\n"
                "  return nil;\n"
                "}\n");
@@ -954,6 +954,17 @@ TEST_F(FormatTestObjC, ObjCNew) {
                "}\n");
   verifyFormat("SEL NewSelector(void) { return @selector(new); }\n");
   verifyFormat("SEL MacroSelector(void) { return MACRO(new); }\n");
+  verifyFormat("+ (instancetype)delete {\n"
+               "  return nil;\n"
+               "}\n");
+  verifyFormat("+ (instancetype)myDelete {\n"
+               "  return [self delete];\n"
+               "}\n");
+  verifyFormat("SEL DeleteSelector(void) { return @selector(delete); }\n");
+  verifyFormat("SEL MacroSelector(void) { return MACRO(delete); }\n");
+  verifyFormat("MACRO(new:)\n");
+  verifyFormat("MACRO(delete:)\n");
+  verifyFormat("foo = @{MACRO(new:) : MACRO(delete:)}\n");
 }
 
 TEST_F(FormatTestObjC, ObjCLiterals) {

+ 3 - 3
unittests/Tooling/RefactoringTest.cpp

@@ -1035,7 +1035,7 @@ TEST(DeduplicateByFileTest, PathsWithDots) {
   llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
       new vfs::InMemoryFileSystem());
   FileManager FileMgr(FileSystemOptions(), VFS);
-#if !defined(LLVM_ON_WIN32)
+#if !defined(_WIN32)
   StringRef Path1 = "a/b/.././c.h";
   StringRef Path2 = "a/c.h";
 #else
@@ -1056,7 +1056,7 @@ TEST(DeduplicateByFileTest, PathWithDotSlash) {
   llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
       new vfs::InMemoryFileSystem());
   FileManager FileMgr(FileSystemOptions(), VFS);
-#if !defined(LLVM_ON_WIN32)
+#if !defined(_WIN32)
   StringRef Path1 = "./a/b/c.h";
   StringRef Path2 = "a/b/c.h";
 #else
@@ -1077,7 +1077,7 @@ TEST(DeduplicateByFileTest, NonExistingFilePath) {
   llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
       new vfs::InMemoryFileSystem());
   FileManager FileMgr(FileSystemOptions(), VFS);
-#if !defined(LLVM_ON_WIN32)
+#if !defined(_WIN32)
   StringRef Path1 = "./a/b/c.h";
   StringRef Path2 = "a/b/c.h";
 #else

+ 2 - 2
unittests/Tooling/ToolingTest.cpp

@@ -216,7 +216,7 @@ struct VerifyEndCallback : public SourceFileCallbacks {
   bool Matched;
 };
 
-#if !defined(LLVM_ON_WIN32)
+#if !defined(_WIN32)
 TEST(newFrontendActionFactory, InjectsSourceFileCallbacks) {
   VerifyEndCallback EndCallback;
 
@@ -531,7 +531,7 @@ TEST(addTargetAndModeForProgramName, IgnoresExistingMode) {
             ArgsAlt);
 }
 
-#ifndef LLVM_ON_WIN32
+#ifndef _WIN32
 TEST(ClangToolTest, BuildASTs) {
   FixedCompilationDatabase Compilations("/", std::vector<std::string>());
 

+ 13 - 0
utils/TableGen/NeonEmitter.cpp

@@ -995,6 +995,19 @@ void Type::applyModifier(char Mod) {
     if (!AppliedQuad)
       Bitwidth *= 2;
     break;
+  case '7':
+    if (AppliedQuad)
+      Bitwidth /= 2;
+    ElementBitwidth = 8;
+    break;
+  case '8':
+    ElementBitwidth = 8;
+    break;
+  case '9':
+    if (!AppliedQuad)
+      Bitwidth *= 2;
+    ElementBitwidth = 8;
+    break;
   default:
     llvm_unreachable("Unhandled character!");
   }