há 10 anos atrás · c5920d0289
--- a/include/clang/Driver/Action.h
+++ b/include/clang/Driver/Action.h
@@ -41,8 +41,6 @@ public:
 
				   enum ActionClass {
			
 
				     InputClass = 0,
			
 
				     BindArchClass,
			
 
				-    CudaDeviceClass,
			
 
				-    CudaHostClass,
			
 
				     PreprocessJobClass,
			
 
				     PrecompileJobClass,
			
 
				     AnalyzeJobClass,
			
@@ -135,41 +133,6 @@ public:
 
				   }
			
 
				 };
			
 
				 
			
 
				-class CudaDeviceAction : public Action {
			
 
				-  virtual void anchor();
			
 
				-  /// GPU architecture to bind -- e.g 'sm_35'.
			
 
				-  const char *GpuArchName;
			
 
				-  /// True when action results are not consumed by the host action (e.g when
			
 
				-  /// -fsyntax-only or --cuda-device-only options are used).
			
 
				-  bool AtTopLevel;
			
 
				-
			
 
				-public:
			
 
				-  CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
			
 
				-                   bool AtTopLevel);
			
 
				-
			
 
				-  const char *getGpuArchName() const { return GpuArchName; }
			
 
				-  bool isAtTopLevel() const { return AtTopLevel; }
			
 
				-
			
 
				-  static bool classof(const Action *A) {
			
 
				-    return A->getKind() == CudaDeviceClass;
			
 
				-  }
			
 
				-};
			
 
				-
			
 
				-class CudaHostAction : public Action {
			
 
				-  virtual void anchor();
			
 
				-  ActionList DeviceActions;
			
 
				-
			
 
				-public:
			
 
				-  CudaHostAction(std::unique_ptr<Action> Input,
			
 
				-                 const ActionList &DeviceActions);
			
 
				-  ~CudaHostAction() override;
			
 
				-
			
 
				-  ActionList &getDeviceActions() { return DeviceActions; }
			
 
				-  const ActionList &getDeviceActions() const { return DeviceActions; }
			
 
				-
			
 
				-  static bool classof(const Action *A) { return A->getKind() == CudaHostClass; }
			
 
				-};
			
 
				-
			
 
				 class JobAction : public Action {
			
 
				   virtual void anchor();
			
 
				 protected:
			
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -351,12 +351,6 @@ def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
 
				   MetaVarName<"<directory>">;
			
 
				 def c : Flag<["-"], "c">, Flags<[DriverOption]>,
			
 
				   HelpText<"Only run preprocess, compile, and assemble steps">;
			
 
				-def cuda_device_only : Flag<["--"], "cuda-device-only">,
			
 
				-  HelpText<"Do device-side CUDA compilation only">;
			
 
				-def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">,
			
 
				-  Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">;
			
 
				-def cuda_host_only : Flag<["--"], "cuda-host-only">,
			
 
				-  HelpText<"Do host-side CUDA compilation only">;
			
 
				 def dA : Flag<["-"], "dA">, Group<d_Group>;
			
 
				 def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
			
 
				   HelpText<"Print macro definitions in -E mode in addition to normal output">;
			
--- a/include/clang/Driver/Types.def
+++ b/include/clang/Driver/Types.def
@@ -44,7 +44,6 @@ TYPE("c",                        C,            PP_C,            "c",     "u")
 
				 TYPE("cl",                       CL,           PP_C,            "cl",    "u")
			
 
				 TYPE("cuda-cpp-output",          PP_CUDA,      INVALID,         "cui",   "u")
			
 
				 TYPE("cuda",                     CUDA,         PP_CUDA,         "cu",    "u")
			
 
				-TYPE("cuda",                     CUDA_DEVICE,  PP_CUDA,         "cu",    "")
			
 
				 TYPE("objective-c-cpp-output",   PP_ObjC,      INVALID,         "mi",    "u")
			
 
				 TYPE("objc-cpp-output",          PP_ObjC_Alias, INVALID,        "mi",    "u")
			
 
				 TYPE("objective-c",              ObjC,         PP_ObjC,         "m",     "u")
			
--- a/include/clang/Driver/Types.h
+++ b/include/clang/Driver/Types.h
@@ -63,9 +63,6 @@ namespace types {
 
				   /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and headers).
			
 
				   bool isCXX(ID Id);
			
 
				 
			
 
				-  /// isCuda - Is this a CUDA input.
			
 
				-  bool isCuda(ID Id);
			
 
				-
			
 
				   /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and headers).
			
 
				   bool isObjC(ID Id);
			
 
				 
			
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -24,8 +24,6 @@ const char *Action::getClassName(ActionClass AC) {
 
				   switch (AC) {
			
 
				   case InputClass: return "input";
			
 
				   case BindArchClass: return "bind-arch";
			
 
				-  case CudaDeviceClass: return "cuda-device";
			
 
				-  case CudaHostClass: return "cuda-host";
			
 
				   case PreprocessJobClass: return "preprocessor";
			
 
				   case PrecompileJobClass: return "precompiler";
			
 
				   case AnalyzeJobClass: return "analyzer";
			
@@ -55,25 +53,6 @@ BindArchAction::BindArchAction(std::unique_ptr<Action> Input,
 
				                                const char *_ArchName)
			
 
				     : Action(BindArchClass, std::move(Input)), ArchName(_ArchName) {}
			
 
				 
			
 
				-void CudaDeviceAction::anchor() {}
			
 
				-
			
 
				-CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
			
 
				-                                   const char *ArchName, bool AtTopLevel)
			
 
				-    : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
			
 
				-      AtTopLevel(AtTopLevel) {}
			
 
				-
			
 
				-void CudaHostAction::anchor() {}
			
 
				-
			
 
				-CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
			
 
				-                               const ActionList &_DeviceActions)
			
 
				-    : Action(CudaHostClass, std::move(Input)), DeviceActions(_DeviceActions) {}
			
 
				-
			
 
				-CudaHostAction::~CudaHostAction() {
			
 
				-  for (iterator it = DeviceActions.begin(), ie = DeviceActions.end(); it != ie;
			
 
				-       ++it)
			
 
				-    delete *it;
			
 
				-}
			
 
				-
			
 
				 void JobAction::anchor() {}
			
 
				 
			
 
				 JobAction::JobAction(ActionClass Kind, std::unique_ptr<Action> Input,
			
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -174,10 +174,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
 
				   } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
			
 
				     FinalPhase = phases::Backend;
			
 
				 
			
 
				-    // -c and partial CUDA compilations only run up to the assembler.
			
 
				-  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) ||
			
 
				-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) ||
			
 
				-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) {
			
 
				+    // -c only runs up to the assembler.
			
 
				+  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
			
 
				     FinalPhase = phases::Assemble;
			
 
				 
			
 
				     // Otherwise do everything.
			
@@ -902,20 +900,9 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
 
				   } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
			
 
				     os << '"' << BIA->getArchName() << '"' << ", {"
			
 
				        << PrintActions1(C, *BIA->begin(), Ids) << "}";
			
 
				-  } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
			
 
				-    os << '"' << CDA->getGpuArchName() << '"' << ", {"
			
 
				-       << PrintActions1(C, *CDA->begin(), Ids) << "}";
			
 
				   } else {
			
 
				-    ActionList *AL;
			
 
				-    if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
			
 
				-      os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
			
 
				-         << ", gpu binaries ";
			
 
				-      AL = &CHA->getDeviceActions();
			
 
				-    } else
			
 
				-      AL = &A->getInputs();
			
 
				-
			
 
				     const char *Prefix = "{";
			
 
				-    for (Action *PreRequisite : *AL) {
			
 
				+    for (Action *PreRequisite : *A) {
			
 
				       os << Prefix << PrintActions1(C, PreRequisite, Ids);
			
 
				       Prefix = ", ";
			
 
				     }
			
@@ -1228,93 +1215,6 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
 
				   }
			
 
				 }
			
 
				 
			
 
				-// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE input
			
 
				-// action and then wraps each in CudaDeviceAction paired with appropriate GPU
			
 
				-// arch name. If we're only building device-side code, each action remains
			
 
				-// independent. Otherwise we pass device-side actions as inputs to a new
			
 
				-// CudaHostAction which combines both host and device side actions.
			
 
				-static std::unique_ptr<Action>
			
 
				-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
			
 
				-                 const Arg *InputArg, const types::ID InputType,
			
 
				-                 std::unique_ptr<Action> Current, ActionList &Actions) {
			
 
				-
			
 
				-  assert(InputType == types::TY_CUDA &&
			
 
				-         "CUDA Actions only apply to CUDA inputs.");
			
 
				-
			
 
				-  // Collect all cuda_gpu_arch parameters, removing duplicates.
			
 
				-  SmallVector<const char *, 4> GpuArchList;
			
 
				-  llvm::StringSet<> GpuArchNames;
			
 
				-  for (Arg *A : Args) {
			
 
				-    if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) {
			
 
				-      A->claim();
			
 
				-      if (GpuArchNames.insert(A->getValue()).second)
			
 
				-        GpuArchList.push_back(A->getValue());
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Default to sm_20 which is the lowest common denominator for supported GPUs.
			
 
				-  // sm_20 code should work correctly, if suboptimally, on all newer GPUs.
			
 
				-  if (GpuArchList.empty())
			
 
				-    GpuArchList.push_back("sm_20");
			
 
				-
			
 
				-  // Replicate inputs for each GPU architecture.
			
 
				-  Driver::InputList CudaDeviceInputs;
			
 
				-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
			
 
				-    CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
			
 
				-
			
 
				-  // Build actions for all device inputs.
			
 
				-  ActionList CudaDeviceActions;
			
 
				-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
			
 
				-  assert(GpuArchList.size() == CudaDeviceActions.size() &&
			
 
				-         "Failed to create actions for all devices");
			
 
				-
			
 
				-  // Check whether any of device actions stopped before they could generate PTX.
			
 
				-  bool PartialCompilation = false;
			
 
				-  bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only);
			
 
				-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) {
			
 
				-    if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) {
			
 
				-      PartialCompilation = true;
			
 
				-      break;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  // Figure out what to do with device actions -- pass them as inputs to the
			
 
				-  // host action or run each of them independently.
			
 
				-  if (PartialCompilation || DeviceOnlyCompilation) {
			
 
				-    // In case of partial or device-only compilation results of device actions
			
 
				-    // are not consumed by the host action device actions have to be added to
			
 
				-    // top-level actions list with AtTopLevel=true and run independently.
			
 
				-
			
 
				-    // -o is ambiguous if we have more than one top-level action.
			
 
				-    if (Args.hasArg(options::OPT_o) &&
			
 
				-        (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
			
 
				-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
			
 
				-      return nullptr;
			
 
				-    }
			
 
				-
			
 
				-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
			
 
				-      Actions.push_back(
			
 
				-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
			
 
				-                               GpuArchList[i], /* AtTopLevel */ true));
			
 
				-    // Kill host action in case of device-only compilation.
			
 
				-    if (DeviceOnlyCompilation)
			
 
				-      Current.reset(nullptr);
			
 
				-    return Current;
			
 
				-  } else {
			
 
				-    // Outputs of device actions during complete CUDA compilation get created
			
 
				-    // with AtTopLevel=false and become inputs for the host action.
			
 
				-    ActionList DeviceActions;
			
 
				-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
			
 
				-      DeviceActions.push_back(
			
 
				-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
			
 
				-                               GpuArchList[i], /* AtTopLevel */ false));
			
 
				-    // Return a new host action that incorporates original host action and all
			
 
				-    // device actions.
			
 
				-    return std::unique_ptr<Action>(
			
 
				-        new CudaHostAction(std::move(Current), DeviceActions));
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				 void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
			
 
				                           const InputList &Inputs, ActionList &Actions) const {
			
 
				   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
			
@@ -1412,25 +1312,6 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
 
				       continue;
			
 
				     }
			
 
				 
			
 
				-    phases::ID CudaInjectionPhase;
			
 
				-    if (isSaveTempsEnabled()) {
			
 
				-      // All phases are done independently, inject GPU blobs during compilation
			
 
				-      // phase as that's where we generate glue code to init them.
			
 
				-      CudaInjectionPhase = phases::Compile;
			
 
				-    } else {
			
 
				-      // Assumes that clang does everything up until linking phase, so we inject
			
 
				-      // cuda device actions at the last step before linking. Otherwise CUDA
			
 
				-      // host action forces preprocessor into a separate invocation.
			
 
				-      if (FinalPhase == phases::Link) {
			
 
				-        for (auto i = PL.begin(), e = PL.end(); i != e; ++i) {
			
 
				-          auto next = i + 1;
			
 
				-          if (next != e && *next == phases::Link)
			
 
				-            CudaInjectionPhase = *i;
			
 
				-        }
			
 
				-      } else
			
 
				-        CudaInjectionPhase = FinalPhase;
			
 
				-    }
			
 
				-
			
 
				     // Build the pipeline for this file.
			
 
				     std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
			
 
				     for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
			
@@ -1456,15 +1337,6 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
 
				 
			
 
				       // Otherwise construct the appropriate action.
			
 
				       Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
			
 
				-
			
 
				-      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase &&
			
 
				-          !Args.hasArg(options::OPT_cuda_host_only)) {
			
 
				-        Current = buildCudaActions(*this, TC, Args, InputArg, InputType,
			
 
				-                                   std::move(Current), Actions);
			
 
				-        if (!Current)
			
 
				-          break;
			
 
				-      }
			
 
				-
			
 
				       if (Current->getType() == types::TY_Nothing)
			
 
				         break;
			
 
				     }
			
@@ -1704,13 +1576,7 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
 
				   if (isa<BackendJobAction>(JA)) {
			
 
				     // Check if the compiler supports emitting LLVM IR.
			
 
				     assert(Inputs->size() == 1);
			
 
				-    JobAction *CompileJA;
			
 
				-    // Extract real host action, if it's a CudaHostAction.
			
 
				-    if (CudaHostAction *CudaHA = dyn_cast<CudaHostAction>(*Inputs->begin()))
			
 
				-      CompileJA = cast<CompileJobAction>(*CudaHA->begin());
			
 
				-    else
			
 
				-      CompileJA = cast<CompileJobAction>(*Inputs->begin());
			
 
				-
			
 
				+    JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin());
			
 
				     const Tool *Compiler = TC->SelectTool(*CompileJA);
			
 
				     if (!Compiler)
			
 
				       return nullptr;
			
@@ -1744,20 +1610,6 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
 
				                                 InputInfo &Result) const {
			
 
				   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
			
 
				 
			
 
				-  InputInfoList CudaDeviceInputInfos;
			
 
				-  if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
			
 
				-    InputInfo II;
			
 
				-    // Append outputs of device jobs to the input list.
			
 
				-    for (const Action *DA : CHA->getDeviceActions()) {
			
 
				-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
			
 
				-                         /*MultipleArchs*/ false, LinkingOutput, II);
			
 
				-      CudaDeviceInputInfos.push_back(II);
			
 
				-    }
			
 
				-    // Override current action with a real host compile action and continue
			
 
				-    // processing it.
			
 
				-    A = *CHA->begin();
			
 
				-  }
			
 
				-
			
 
				   if (const InputAction *IA = dyn_cast<InputAction>(A)) {
			
 
				     // FIXME: It would be nice to not claim this here; maybe the old scheme of
			
 
				     // just using Args was better?
			
@@ -1783,24 +1635,11 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
 
				     else
			
 
				       TC = &C.getDefaultToolChain();
			
 
				 
			
 
				-    BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel,
			
 
				+    BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), AtTopLevel,
			
 
				                        MultipleArchs, LinkingOutput, Result);
			
 
				     return;
			
 
				   }
			
 
				 
			
 
				-  if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
			
 
				-    // Figure out which NVPTX triple to use for device-side compilation based on
			
 
				-    // whether host is 64-bit.
			
 
				-    llvm::Triple DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit()
			
 
				-                                  ? "nvptx64-nvidia-cuda"
			
 
				-                                  : "nvptx-nvidia-cuda");
			
 
				-    BuildJobsForAction(C, *CDA->begin(),
			
 
				-                       &getToolChain(C.getArgs(), DeviceTriple),
			
 
				-                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
			
 
				-                       /*MultipleArchs*/ true, LinkingOutput, Result);
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				   const ActionList *Inputs = &A->getInputs();
			
 
				 
			
 
				   const JobAction *JA = cast<JobAction>(A);
			
@@ -1832,10 +1671,6 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
 
				   if (JA->getType() == types::TY_dSYM)
			
 
				     BaseInput = InputInfos[0].getFilename();
			
 
				 
			
 
				-  // Append outputs of cuda device jobs to the input list
			
 
				-  if (CudaDeviceInputInfos.size())
			
 
				-    InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end());
			
 
				-
			
 
				   // Determine the place to write output to, if any.
			
 
				   if (JA->getType() == types::TY_Nothing)
			
 
				     Result = InputInfo(A->getType(), BaseInput);
			
@@ -2217,9 +2052,6 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
 
				         break;
			
 
				       }
			
 
				       break;
			
 
				-    case llvm::Triple::CUDA:
			
 
				-      TC = new toolchains::CudaToolChain(*this, Target, Args);
			
 
				-      break;
			
 
				     default:
			
 
				       // Of these targets, Hexagon is the only one that might have
			
 
				       // an OS of Linux, in which case it got handled above already.
			
--- a/lib/Driver/ToolChain.cpp
+++ b/lib/Driver/ToolChain.cpp
@@ -151,8 +151,6 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
 
				 
			
 
				   case Action::InputClass:
			
 
				   case Action::BindArchClass:
			
 
				-  case Action::CudaDeviceClass:
			
 
				-  case Action::CudaHostClass:
			
 
				   case Action::LipoJobClass:
			
 
				   case Action::DsymutilJobClass:
			
 
				   case Action::VerifyDebugInfoJobClass:
			
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -3652,65 +3652,6 @@ Tool *DragonFly::buildLinker() const {
 
				   return new tools::dragonfly::Linker(*this);
			
 
				 }
			
 
				 
			
 
				-/// Stub for CUDA toolchain. At the moment we don't have assembler or
			
 
				-/// linker and need toolchain mainly to propagate device-side options
			
 
				-/// to CC1.
			
 
				-
			
 
				-CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
			
 
				-                             const ArgList &Args)
			
 
				-    : Linux(D, Triple, Args) {}
			
 
				-
			
 
				-void
			
 
				-CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
			
 
				-                                     llvm::opt::ArgStringList &CC1Args) const {
			
 
				-  Linux::addClangTargetOptions(DriverArgs, CC1Args);
			
 
				-  CC1Args.push_back("-fcuda-is-device");
			
 
				-}
			
 
				-
			
 
				-llvm::opt::DerivedArgList *
			
 
				-CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
			
 
				-                             const char *BoundArch) const {
			
 
				-  DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
			
 
				-  const OptTable &Opts = getDriver().getOpts();
			
 
				-
			
 
				-  for (Arg *A : Args) {
			
 
				-    if (A->getOption().matches(options::OPT_Xarch__)) {
			
 
				-      // Skip this argument unless the architecture matches BoundArch
			
 
				-      if (A->getValue(0) != StringRef(BoundArch))
			
 
				-        continue;
			
 
				-
			
 
				-      unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
			
 
				-      unsigned Prev = Index;
			
 
				-      std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
			
 
				-
			
 
				-      // If the argument parsing failed or more than one argument was
			
 
				-      // consumed, the -Xarch_ argument's parameter tried to consume
			
 
				-      // extra arguments. Emit an error and ignore.
			
 
				-      //
			
 
				-      // We also want to disallow any options which would alter the
			
 
				-      // driver behavior; that isn't going to work in our model. We
			
 
				-      // use isDriverOption() as an approximation, although things
			
 
				-      // like -O4 are going to slip through.
			
 
				-      if (!XarchArg || Index > Prev + 1) {
			
 
				-        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
			
 
				-            << A->getAsString(Args);
			
 
				-        continue;
			
 
				-      } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
			
 
				-        getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
			
 
				-            << A->getAsString(Args);
			
 
				-        continue;
			
 
				-      }
			
 
				-      XarchArg->setBaseArg(A);
			
 
				-      A = XarchArg.release();
			
 
				-      DAL->AddSynthesizedArg(A);
			
 
				-    }
			
 
				-    DAL->append(A);
			
 
				-  }
			
 
				-
			
 
				-  DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
			
 
				-  return DAL;
			
 
				-}
			
 
				-
			
 
				 /// XCore tool chain
			
 
				 XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
			
 
				     : ToolChain(D, Triple, Args) {
			
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -699,18 +699,6 @@ private:
 
				   std::string computeSysRoot() const;
			
 
				 };
			
 
				 
			
 
				-class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
			
 
				-public:
			
 
				-  CudaToolChain(const Driver &D, const llvm::Triple &Triple,
			
 
				-                const llvm::opt::ArgList &Args);
			
 
				-
			
 
				-  llvm::opt::DerivedArgList *
			
 
				-  TranslateArgs(const llvm::opt::DerivedArgList &Args,
			
 
				-                const char *BoundArch) const override;
			
 
				-  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
			
 
				-                             llvm::opt::ArgStringList &CC1Args) const override;
			
 
				-};
			
 
				-
			
 
				 class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux {
			
 
				 protected:
			
 
				   GCCVersion GCCLibAndIncVersion;
			
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -1488,12 +1488,6 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) {
 
				     return CPUName;
			
 
				   }
			
 
				 
			
 
				-  case llvm::Triple::nvptx:
			
 
				-  case llvm::Triple::nvptx64:
			
 
				-    if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
			
 
				-      return A->getValue();
			
 
				-    return "";
			
 
				-
			
 
				   case llvm::Triple::ppc:
			
 
				   case llvm::Triple::ppc64:
			
 
				   case llvm::Triple::ppc64le: {
			
@@ -2832,14 +2826,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
				       getToolChain().getTriple().isWindowsCygwinEnvironment();
			
 
				   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
			
 
				 
			
 
				-  // Check number of inputs for sanity. We need at least one input.
			
 
				-  assert(Inputs.size() >= 1 && "Must have at least one input.");
			
 
				+  assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
			
 
				   const InputInfo &Input = Inputs[0];
			
 
				-  // CUDA compilation may have multiple inputs (source file + results of
			
 
				-  // device-side compilations). All other jobs are expected to have exactly one
			
 
				-  // input.
			
 
				-  bool IsCuda = types::isCuda(Input.getType());
			
 
				-  assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
			
 
				 
			
 
				   // Invoke ourselves in -cc1 mode.
			
 
				   //
			
@@ -4814,12 +4802,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
				     assert(Output.isNothing() && "Invalid output.");
			
 
				   }
			
 
				 
			
 
				-  addDashXForInput(Args, Input, CmdArgs);
			
 
				+  for (const auto &II : Inputs) {
			
 
				+    addDashXForInput(Args, II, CmdArgs);
			
 
				 
			
 
				-  if (Input.isFilename())
			
 
				-    CmdArgs.push_back(Input.getFilename());
			
 
				-  else
			
 
				-    Input.getInputArg().renderAsInput(Args, CmdArgs);
			
 
				+    if (II.isFilename())
			
 
				+      CmdArgs.push_back(II.getFilename());
			
 
				+    else
			
 
				+      II.getInputArg().renderAsInput(Args, CmdArgs);
			
 
				+  }
			
 
				 
			
 
				   Args.AddAllArgs(CmdArgs, options::OPT_undef);
			
 
				 
			
@@ -4857,16 +4847,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
				     CmdArgs.push_back(SplitDwarfOut);
			
 
				   }
			
 
				 
			
 
				-  // Host-side cuda compilation receives device-side outputs as Inputs[1...].
			
 
				-  // Include them with -fcuda-include-gpubinary.
			
 
				-  if (IsCuda && Inputs.size() > 1)
			
 
				-    for (InputInfoList::const_iterator it = std::next(Inputs.begin()),
			
 
				-                                       ie = Inputs.end();
			
 
				-         it != ie; ++it) {
			
 
				-      CmdArgs.push_back("-fcuda-include-gpubinary");
			
 
				-      CmdArgs.push_back(it->getFilename());
			
 
				-    }
			
 
				-
			
 
				   // Finally add the compile command to the compilation.
			
 
				   if (Args.hasArg(options::OPT__SLASH_fallback) &&
			
 
				       Output.getType() == types::TY_Object &&
			
--- a/lib/Driver/Types.cpp
+++ b/lib/Driver/Types.cpp
@@ -86,7 +86,6 @@ bool types::isAcceptedByClang(ID Id) {
 
				   case TY_C: case TY_PP_C:
			
 
				   case TY_CL:
			
 
				   case TY_CUDA: case TY_PP_CUDA:
			
 
				-  case TY_CUDA_DEVICE:
			
 
				   case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias:
			
 
				   case TY_CXX: case TY_PP_CXX:
			
 
				   case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
			
@@ -123,19 +122,7 @@ bool types::isCXX(ID Id) {
 
				   case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
			
 
				   case TY_CXXHeader: case TY_PP_CXXHeader:
			
 
				   case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
			
 
				-  case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
			
 
				-    return true;
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-bool types::isCuda(ID Id) {
			
 
				-  switch (Id) {
			
 
				-  default:
			
 
				-    return false;
			
 
				-
			
 
				-  case TY_CUDA:
			
 
				-  case TY_PP_CUDA:
			
 
				-  case TY_CUDA_DEVICE:
			
 
				+  case TY_CUDA: case TY_PP_CUDA:
			
 
				     return true;
			
 
				   }
			
 
				 }
			
@@ -219,12 +206,10 @@ void types::getCompilationPhases(ID Id, llvm::SmallVectorImpl<phases::ID> &P) {
 
				         P.push_back(phases::Compile);
			
 
				         P.push_back(phases::Backend);
			
 
				       }
			
 
				-      if (Id != TY_CUDA_DEVICE)
			
 
				-        P.push_back(phases::Assemble);
			
 
				+      P.push_back(phases::Assemble);
			
 
				     }
			
 
				   }
			
 
				-
			
 
				-  if (!onlyPrecompileType(Id) && Id != TY_CUDA_DEVICE) {
			
 
				+  if (!onlyPrecompileType(Id)) {
			
 
				     P.push_back(phases::Link);
			
 
				   }
			
 
				   assert(0 < P.size() && "Not enough phases in list");
			
--- a/lib/Frontend/CreateInvocationFromCommandLine.cpp
+++ b/lib/Frontend/CreateInvocationFromCommandLine.cpp
@@ -15,7 +15,6 @@
 
				 #include "clang/Basic/DiagnosticOptions.h"
			
 
				 #include "clang/Driver/Compilation.h"
			
 
				 #include "clang/Driver/Driver.h"
			
 
				-#include "clang/Driver/Action.h"
			
 
				 #include "clang/Driver/Options.h"
			
 
				 #include "clang/Driver/Tool.h"
			
 
				 #include "clang/Frontend/CompilerInstance.h"
			
@@ -62,21 +61,9 @@ clang::createInvocationFromCommandLine(ArrayRef<const char *> ArgList,
 
				   }
			
 
				 
			
 
				   // We expect to get back exactly one command job, if we didn't something
			
 
				-  // failed. CUDA compilation is an exception as it creates multiple jobs. If
			
 
				-  // that's the case, we proceed with the first job. If caller needs particular
			
 
				-  // CUDA job, it should be controlled via --cuda-{host|device}-only option
			
 
				-  // passed to the driver.
			
 
				+  // failed.
			
 
				   const driver::JobList &Jobs = C->getJobs();
			
 
				-  bool CudaCompilation = false;
			
 
				-  if (Jobs.size() > 1) {
			
 
				-    for (auto &A : C->getActions())
			
 
				-      if (isa<driver::CudaDeviceAction>(A)) {
			
 
				-        CudaCompilation = true;
			
 
				-        break;
			
 
				-      }
			
 
				-  }
			
 
				-  if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) ||
			
 
				-      (Jobs.size() > 1 && !CudaCompilation)) {
			
 
				+  if (Jobs.size() != 1 || !isa<driver::Command>(*Jobs.begin())) {
			
 
				     SmallString<256> Msg;
			
 
				     llvm::raw_svector_ostream OS(Msg);
			
 
				     Jobs.Print(OS, "; ", true);
			
--- a/test/Driver/cuda-options.cu
+++ b/test/Driver/cuda-options.cu
@@ -1,109 +0,0 @@
 
				-// Tests CUDA compilation pipeline construction in Driver.
			
 
				-// REQUIRES: clang-driver
			
 
				-
			
 
				-// Simple compilation case:
			
 
				-// RUN: %clang -### -c %s 2>&1 \
			
 
				-// Compile device-side to PTX assembly and make sure we use it on the host side.
			
 
				-// RUN:   | FileCheck -check-prefix CUDA-D1 \
			
 
				-// Then compile host side and incorporate device code.
			
 
				-// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
			
 
				-// Make sure we don't link anything.
			
 
				-// RUN:   -check-prefix CUDA-NL %s
			
 
				-
			
 
				-// Typical compilation + link case:
			
 
				-// RUN: %clang -### %s 2>&1 \
			
 
				-// Compile device-side to PTX assembly and make sure we use it on the host side
			
 
				-// RUN:   | FileCheck -check-prefix CUDA-D1 \
			
 
				-// Then compile host side and incorporate device code.
			
 
				-// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
			
 
				-// Then link things.
			
 
				-// RUN:   -check-prefix CUDA-L %s
			
 
				-
			
 
				-// Verify that -cuda-no-device disables device-side compilation and linking
			
 
				-// RUN: %clang -### --cuda-host-only %s 2>&1 \
			
 
				-// Make sure we didn't run device-side compilation.
			
 
				-// RUN:   | FileCheck -check-prefix CUDA-ND \
			
 
				-// Then compile host side and make sure we don't attempt to incorporate GPU code.
			
 
				-// RUN:    -check-prefix CUDA-H -check-prefix CUDA-H-NI \
			
 
				-// Make sure we don't link anything.
			
 
				-// RUN:    -check-prefix CUDA-NL %s
			
 
				-
			
 
				-// Verify that -cuda-no-host disables host-side compilation and linking
			
 
				-// RUN: %clang -### --cuda-device-only %s 2>&1 \
			
 
				-// Compile device-side to PTX assembly
			
 
				-// RUN:   | FileCheck -check-prefix CUDA-D1 \
			
 
				-// Make sure there are no host cmpilation or linking.
			
 
				-// RUN:   -check-prefix CUDA-NH -check-prefix CUDA-NL %s
			
 
				-
			
 
				-// Verify that with -S we compile host and device sides to assembly
			
 
				-// and incorporate device code on the host side.
			
 
				-// RUN: %clang -### -S -c %s 2>&1 \
			
 
				-// Compile device-side to PTX assembly
			
 
				-// RUN:   | FileCheck -check-prefix CUDA-D1 \
			
 
				-// Then compile host side and incorporate GPU code.
			
 
				-// RUN:  -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
			
 
				-// Make sure we don't link anything.
			
 
				-// RUN:  -check-prefix CUDA-NL %s
			
 
				-
			
 
				-// Verify that --cuda-gpu-arch option passes correct GPU
			
 
				-// archtecture info to device compilation.
			
 
				-// RUN: %clang -### --cuda-gpu-arch=sm_35 -c %s 2>&1 \
			
 
				-// Compile device-side to PTX assembly.
			
 
				-// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
			
 
				-// Then compile host side and incorporate GPU code.
			
 
				-// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
			
 
				-// Make sure we don't link anything.
			
 
				-// RUN:   -check-prefix CUDA-NL %s
			
 
				-
			
 
				-// Verify that there is device-side compilation per --cuda-gpu-arch args
			
 
				-// and that all results are included on the host side.
			
 
				-// RUN: %clang -### --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
			
 
				-// Compile both device-sides to PTX assembly
			
 
				-// RUN:   | FileCheck \
			
 
				-// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
			
 
				-// RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \
			
 
				-// Then compile host side and incorporate both device-side outputs
			
 
				-// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 -check-prefix CUDA-H-I2 \
			
 
				-// Make sure we don't link anything.
			
 
				-// RUN:   -check-prefix CUDA-NL %s
			
 
				-
			
 
				-// Match device-side compilation
			
 
				-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
			
 
				-// CUDA-D1-SAME: "-fcuda-is-device"
			
 
				-// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
			
 
				-// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
			
 
				-// CUDA-D1-SAME: "-x" "cuda"
			
 
				-
			
 
				-// Match anothe device-side compilation
			
 
				-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
			
 
				-// CUDA-D2-SAME: "-fcuda-is-device"
			
 
				-// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
			
 
				-// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
			
 
				-// CUDA-D2-SAME: "-x" "cuda"
			
 
				-
			
 
				-// Match no device-side compilation
			
 
				-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
			
 
				-// CUDA-ND-SAME-NOT: "-fcuda-is-device"
			
 
				-
			
 
				-// Match host-side compilation
			
 
				-// CUDA-H: "-cc1" "-triple"
			
 
				-// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda"
			
 
				-// CUDA-H-SAME-NOT: "-fcuda-is-device"
			
 
				-// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]"
			
 
				-// CUDA-H-SAME: "-x" "cuda"
			
 
				-// CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]"
			
 
				-// CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]"
			
 
				-
			
 
				-// Match no GPU code inclusion.
			
 
				-// CUDA-H-NI-NOT: "-fcuda-include-gpubinary"
			
 
				-
			
 
				-// Match no CUDA compilation
			
 
				-// CUDA-NH-NOT: "-cc1" "-triple"
			
 
				-// CUDA-NH-SAME-NOT: "-x" "cuda"
			
 
				-
			
 
				-// Match linker
			
 
				-// CUDA-L: "{{.*}}ld{{(.exe)?}}"
			
 
				-// CUDA-L-SAME: "[[HOSTOBJ]]"
			
 
				-
			
 
				-// Match no linker
			
 
				-// CUDA-NL-NOT: "{{.*}}ld{{(.exe)?}}"
			
--- a/test/Index/attributes-cuda.cu
+++ b/test/Index/attributes-cuda.cu
@@ -1,6 +1,4 @@
 
				 // RUN: c-index-test -test-load-source all -x cuda %s | FileCheck %s
			
 
				-// RUN: c-index-test -test-load-source all -x cuda --cuda-host-only %s | FileCheck %s
			
 
				-// RUN: c-index-test -test-load-source all -x cuda --cuda-device-only %s | FileCheck %s
			
 
				 
			
 
				 __attribute__((device)) void f_device();
			
 
				 __attribute__((global)) void f_global();
			
@@ -8,13 +6,13 @@ __attribute__((constant)) int* g_constant;
 
				 __attribute__((shared)) float *g_shared;
			
 
				 __attribute__((host)) void f_host();
			
 
				 
			
 
				-// CHECK:       attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30
			
 
				-// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(device)
			
 
				-// CHECK:       attributes-cuda.cu:6:30: FunctionDecl=f_global:6:30
			
 
				-// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(global)
			
 
				-// CHECK:       attributes-cuda.cu:7:32: VarDecl=g_constant:7:32 (Definition)
			
 
				-// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(constant)
			
 
				-// CHECK:       attributes-cuda.cu:8:32: VarDecl=g_shared:8:32 (Definition)
			
 
				-// CHECK-NEXT:  attributes-cuda.cu:8:16: attribute(shared)
			
 
				-// CHECK:       attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28
			
 
				-// CHECK-NEXT:  attributes-cuda.cu:9:16: attribute(host)
			
 
				+// CHECK:       attributes-cuda.cu:3:30: FunctionDecl=f_device:3:30
			
 
				+// CHECK-NEXT:  attributes-cuda.cu:3:16: attribute(device)
			
 
				+// CHECK:       attributes-cuda.cu:4:30: FunctionDecl=f_global:4:30
			
 
				+// CHECK-NEXT:  attributes-cuda.cu:4:16: attribute(global)
			
 
				+// CHECK:       attributes-cuda.cu:5:32: VarDecl=g_constant:5:32 (Definition)
			
 
				+// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(constant)
			
 
				+// CHECK:       attributes-cuda.cu:6:32: VarDecl=g_shared:6:32 (Definition)
			
 
				+// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(shared)
			
 
				+// CHECK:       attributes-cuda.cu:7:28: FunctionDecl=f_host:7:28
			
 
				+// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(host)
			
--- a/test/Index/index-file.cu
+++ b/test/Index/index-file.cu
@@ -1,9 +0,0 @@
 
				-// Make sure we can process CUDA file even if driver creates multiple jobs
			
 
				-// RUN: c-index-test -test-load-source all %s | FileCheck %s -check-prefix=CHECK-ANY
			
 
				-// Make sure we process correct side of cuda compilation
			
 
				-// RUN: c-index-test -test-load-source all --cuda-host-only %s | FileCheck %s -check-prefix=CHECK-HOST
			
 
				-// RUN: c-index-test -test-load-source all --cuda-device-only %s | FileCheck %s -check-prefix=CHECK-DEVICE
			
 
				-
			
 
				-// CHECK-ANY: macro definition=__cplusplus
			
 
				-// CHECK-HOST-NOT: macro definition=__CUDA_ARCH__
			
 
				-// CHECK-DEVICE: macro definition=__CUDA_ARCH__
			
--- a/tools/libclang/CIndex.cpp
+++ b/tools/libclang/CIndex.cpp
@@ -3102,12 +3102,6 @@ static void clang_parseTranslationUnit_Impl(void *UserData) {
 
				       /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies,
			
 
				       /*UserFilesAreVolatile=*/true, ForSerialization, &ErrUnit));
			
 
				 
			
 
				-  // Early failures in LoadFromCommandLine may return with ErrUnit unset.
			
 
				-  if (!Unit && !ErrUnit) {
			
 
				-    PTUI->result = CXError_ASTReadError;
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				   if (NumErrors != Diags->getClient()->getNumErrors()) {
			
 
				     // Make sure to check that 'Unit' is non-NULL.
			
 
				     if (CXXIdx->getDisplayDiagnostics())
			
--- a/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/unittests/ASTMatchers/ASTMatchersTest.h
@@ -164,7 +164,6 @@ testing::AssertionResult matchesConditionallyWithCuda(
 
				   std::vector<std::string> Args;
			
 
				   Args.push_back("-xcuda");
			
 
				   Args.push_back("-fno-ms-extensions");
			
 
				-  Args.push_back("--cuda-host-only");
			
 
				   Args.push_back(CompileArg);
			
 
				   if (!runToolOnCodeWithArgs(Factory->create(),
			
 
				                              CudaHeader + Code, Args)) {