6 år sedan · 1c29f9f7f5
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -221,6 +221,9 @@ ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
 
															 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
														
 
															 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
														
 
															+void initializeGCNNSAReassignPass(PassRegistry &);
														
 
															+extern char &GCNNSAReassignID;
														
 
															+
														
 
															 Target &getTheAMDGPUTarget();
														
 
															 Target &getTheGCNTarget();
														
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -149,6 +149,12 @@ static cl::opt<bool> EnableLowerKernelArguments(
 
															   cl::init(true),
														
 
															   cl::Hidden);
														
 
															+static cl::opt<bool> EnableRegReassign(
														
 
															+  "amdgpu-reassign-regs",
														
 
															+  cl::desc("Enable register reassign optimizations on gfx10+"),
														
 
															+  cl::init(true),
														
 
															+  cl::Hidden);
														
 
															+
														
 
															 // Enable atomic optimization
														
 
															 static cl::opt<bool> EnableAtomicOptimizations(
														
 
															   "amdgpu-atomic-optimizations",
														
@@ -228,6 +234,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
 
															   initializeAMDGPUUseNativeCallsPass(*PR);
														
 
															   initializeAMDGPUSimplifyLibCallsPass(*PR);
														
 
															   initializeAMDGPUInlinerPass(*PR);
														
 
															+  initializeGCNNSAReassignPass(*PR);
														
 
															 }
														
 
															 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
														
@@ -605,6 +612,7 @@ public:
 
															   void addFastRegAlloc() override;
														
 
															   void addOptimizedRegAlloc() override;
														
 
															   void addPreRegAlloc() override;
														
 
															+  bool addPreRewrite() override;
														
 
															   void addPostRegAlloc() override;
														
 
															   void addPreSched2() override;
														
 
															   void addPreEmitPass() override;
														
@@ -926,6 +934,13 @@ void GCNPassConfig::addOptimizedRegAlloc() {
 
															   TargetPassConfig::addOptimizedRegAlloc();
														
 
															 }
														
 
															+bool GCNPassConfig::addPreRewrite() {
														
 
															+  if (EnableRegReassign) {
														
 
															+    addPass(&GCNNSAReassignID);
														
 
															+  }
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															 void GCNPassConfig::addPostRegAlloc() {
														
 
															   addPass(&SIFixVGPRCopiesID);
														
 
															   if (getOptLevel() > CodeGenOpt::None)
														
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -116,6 +116,7 @@ add_llvm_target(AMDGPUCodeGen
 
															   SIShrinkInstructions.cpp
														
 
															   SIWholeQuadMode.cpp
														
 
															   GCNILPSched.cpp
														
 
															+  GCNNSAReassign.cpp
														
 
															   GCNDPPCombine.cpp
														
 
															   SIModeRegister.cpp
														
 
															   )
														
--- a/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -0,0 +1,343 @@
 
															+//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
														
 
															+//
														
 
															+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
														
 
															+// See https://llvm.org/LICENSE.txt for license information.
														
 
															+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
														
 
															+//
														
 
															+//===----------------------------------------------------------------------===//
														
 
															+//
														
 
															+/// \file
														
 
															+/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
														
 
															+/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
														
 
															+/// with sequential versions where possible.
														
 
															+///
														
 
															+//===----------------------------------------------------------------------===//
														
 
															+
														
 
															+#include "AMDGPU.h"
														
 
															+#include "AMDGPUSubtarget.h"
														
 
															+#include "SIInstrInfo.h"
														
 
															+#include "SIMachineFunctionInfo.h"
														
 
															+#include "llvm/ADT/Statistic.h"
														
 
															+#include "llvm/CodeGen/LiveInterval.h"
														
 
															+#include "llvm/CodeGen/LiveIntervals.h"
														
 
															+#include "llvm/CodeGen/LiveRegMatrix.h"
														
 
															+#include "llvm/CodeGen/MachineFunctionPass.h"
														
 
															+#include "llvm/CodeGen/VirtRegMap.h"
														
 
															+#include "llvm/Support/MathExtras.h"
														
 
															+#include <algorithm>
														
 
															+
														
 
															+using namespace llvm;
														
 
															+
														
 
															+#define DEBUG_TYPE "amdgpu-nsa-reassign"
														
 
															+
														
 
															+STATISTIC(NumNSAInstructions,
														
 
															+          "Number of NSA instructions with non-sequential address found");
														
 
															+STATISTIC(NumNSAConverted,
														
 
															+          "Number of NSA instructions changed to sequential");
														
 
															+
														
 
															+namespace {
														
 
															+
														
 
															+class GCNNSAReassign : public MachineFunctionPass {
														
 
															+public:
														
 
															+  static char ID;
														
 
															+
														
 
															+  GCNNSAReassign() : MachineFunctionPass(ID) {
														
 
															+    initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
														
 
															+  }
														
 
															+
														
 
															+  bool runOnMachineFunction(MachineFunction &MF) override;
														
 
															+
														
 
															+  StringRef getPassName() const override { return "GCN NSA Reassign"; }
														
 
															+
														
 
															+  void getAnalysisUsage(AnalysisUsage &AU) const override {
														
 
															+    AU.addRequired<LiveIntervals>();
														
 
															+    AU.addRequired<VirtRegMap>();
														
 
															+    AU.addRequired<LiveRegMatrix>();
														
 
															+    AU.setPreservesAll();
														
 
															+    MachineFunctionPass::getAnalysisUsage(AU);
														
 
															+  }
														
 
															+
														
 
															+private:
														
 
															+  typedef enum {
														
 
															+    NOT_NSA,        // Not an NSA instruction
														
 
															+    FIXED,          // NSA which we cannot modify
														
 
															+    NON_CONTIGUOUS, // NSA with non-sequential address which we can try
														
 
															+                    // to optimize.
														
 
															+    CONTIGUOUS      // NSA with all sequential address registers
														
 
															+  } NSA_Status;
														
 
															+
														
 
															+  const GCNSubtarget *ST;
														
 
															+
														
 
															+  const MachineRegisterInfo *MRI;
														
 
															+
														
 
															+  const SIRegisterInfo *TRI;
														
 
															+
														
 
															+  VirtRegMap *VRM;
														
 
															+
														
 
															+  LiveRegMatrix *LRM;
														
 
															+
														
 
															+  LiveIntervals *LIS;
														
 
															+
														
 
															+  unsigned MaxNumVGPRs;
														
 
															+
														
 
															+  const MCPhysReg *CSRegs;
														
 
															+
														
 
															+  NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
														
 
															+
														
 
															+  bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
														
 
															+                          unsigned StartReg) const;
														
 
															+
														
 
															+  bool canAssign(unsigned StartReg, unsigned NumRegs) const;
														
 
															+
														
 
															+  bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
														
 
															+};
														
 
															+
														
 
															+} // End anonymous namespace.
														
 
															+
														
 
															+INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
														
 
															+                      false, false)
														
 
															+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
														
 
															+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
														
 
															+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
														
 
															+INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
														
 
															+                    false, false)
														
 
															+
														
 
															+
														
 
															+char GCNNSAReassign::ID = 0;
														
 
															+
														
 
															+char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
														
 
															+
														
 
															+bool
														
 
															+GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
														
 
															+                                   unsigned StartReg) const {
														
 
															+  unsigned NumRegs = Intervals.size();
														
 
															+
														
 
															+  for (unsigned N = 0; N < NumRegs; ++N)
														
 
															+    if (VRM->hasPhys(Intervals[N]->reg))
														
 
															+      LRM->unassign(*Intervals[N]);
														
 
															+
														
 
															+  for (unsigned N = 0; N < NumRegs; ++N)
														
 
															+    if (LRM->checkInterference(*Intervals[N], StartReg + N))
														
 
															+      return false;
														
 
															+
														
 
															+  for (unsigned N = 0; N < NumRegs; ++N)
														
 
															+    LRM->assign(*Intervals[N], StartReg + N);
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
														
 
															+  for (unsigned N = 0; N < NumRegs; ++N) {
														
 
															+    unsigned Reg = StartReg + N;
														
 
															+    if (!MRI->isAllocatable(Reg))
														
 
															+      return false;
														
 
															+
														
 
															+    for (unsigned I = 0; CSRegs[I]; ++I)
														
 
															+      if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
														
 
															+          !LRM->isPhysRegUsed(CSRegs[I]))
														
 
															+      return false;
														
 
															+  }
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+bool
														
 
															+GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
														
 
															+  unsigned NumRegs = Intervals.size();
														
 
															+
														
 
															+  if (NumRegs > MaxNumVGPRs)
														
 
															+    return false;
														
 
															+  unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
														
 
															+
														
 
															+  for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
														
 
															+    if (!canAssign(Reg, NumRegs))
														
 
															+      continue;
														
 
															+
														
 
															+    if (tryAssignRegisters(Intervals, Reg))
														
 
															+      return true;
														
 
															+  }
														
 
															+
														
 
															+  return false;
														
 
															+}
														
 
															+
														
 
															+GCNNSAReassign::NSA_Status
														
 
															+GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
														
 
															+  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
														
 
															+  if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
														
 
															+    return NSA_Status::NOT_NSA;
														
 
															+
														
 
															+  int VAddr0Idx =
														
 
															+    AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
														
 
															+
														
 
															+  unsigned VgprBase = 0;
														
 
															+  bool NSA = false;
														
 
															+  for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
														
 
															+    const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
														
 
															+    unsigned Reg = Op.getReg();
														
 
															+    if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
														
 
															+      return NSA_Status::FIXED;
														
 
															+
														
 
															+    unsigned PhysReg = VRM->getPhys(Reg);
														
 
															+
														
 
															+    if (!Fast) {
														
 
															+      if (!PhysReg)
														
 
															+        return NSA_Status::FIXED;
														
 
															+
														
 
															+      // Bail if address is not a VGPR32. That should be possible to extend the
														
 
															+      // optimization to work with subregs of a wider register tuples, but the
														
 
															+      // logic to find free registers will be much more complicated with much
														
 
															+      // less chances for success. That seems reasonable to assume that in most
														
 
															+      // cases a tuple is used because a vector variable contains different
														
 
															+      // parts of an address and it is either already consequitive or cannot
														
 
															+      // be reassigned if not. If needed it is better to rely on register
														
 
															+      // coalescer to process such address tuples.
														
 
															+      if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
														
 
															+        return NSA_Status::FIXED;
														
 
															+
														
 
															+      const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
														
 
															+
														
 
															+      if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
														
 
															+        return NSA_Status::FIXED;
														
 
															+
														
 
															+      for (auto U : MRI->use_nodbg_operands(Reg)) {
														
 
															+        if (U.isImplicit())
														
 
															+          return NSA_Status::FIXED;
														
 
															+        const MachineInstr *UseInst = U.getParent();
														
 
															+        if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
														
 
															+          return NSA_Status::FIXED;
														
 
															+      }
														
 
															+
														
 
															+      if (!LIS->hasInterval(Reg))
														
 
															+        return NSA_Status::FIXED;
														
 
															+    }
														
 
															+
														
 
															+    if (I == 0)
														
 
															+      VgprBase = PhysReg;
														
 
															+    else if (VgprBase + I != PhysReg)
														
 
															+      NSA = true;
														
 
															+  }
														
 
															+
														
 
															+  return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
														
 
															+}
														
 
															+
														
 
															+bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
														
 
															+  ST = &MF.getSubtarget<GCNSubtarget>();
														
 
															+  if (ST->getGeneration() < GCNSubtarget::GFX10)
														
 
															+    return false;
														
 
															+
														
 
															+  MRI = &MF.getRegInfo();
														
 
															+  TRI = ST->getRegisterInfo();
														
 
															+  VRM = &getAnalysis<VirtRegMap>();
														
 
															+  LRM = &getAnalysis<LiveRegMatrix>();
														
 
															+  LIS = &getAnalysis<LiveIntervals>();
														
 
															+
														
 
															+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
														
 
															+  MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
														
 
															+  MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
														
 
															+  CSRegs = TRI->getCalleeSavedRegs(&MF);
														
 
															+
														
 
															+  using Candidate = std::pair<const MachineInstr*, bool>;
														
 
															+  SmallVector<Candidate, 32> Candidates;
														
 
															+  for (const MachineBasicBlock &MBB : MF) {
														
 
															+    for (const MachineInstr &MI : MBB) {
														
 
															+      switch (CheckNSA(MI)) {
														
 
															+      default:
														
 
															+        continue;
														
 
															+      case NSA_Status::CONTIGUOUS:
														
 
															+        Candidates.push_back(std::make_pair(&MI, true));
														
 
															+        break;
														
 
															+      case NSA_Status::NON_CONTIGUOUS:
														
 
															+        Candidates.push_back(std::make_pair(&MI, false));
														
 
															+        ++NumNSAInstructions;
														
 
															+        break;
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  bool Changed = false;
														
 
															+  for (auto &C : Candidates) {
														
 
															+    if (C.second)
														
 
															+      continue;
														
 
															+
														
 
															+    const MachineInstr *MI = C.first;
														
 
															+    if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
														
 
															+      // Already happen to be fixed.
														
 
															+      C.second = true;
														
 
															+      ++NumNSAConverted;
														
 
															+      continue;
														
 
															+    }
														
 
															+
														
 
															+    const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
														
 
															+    int VAddr0Idx =
														
 
															+      AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
														
 
															+
														
 
															+    SmallVector<LiveInterval *, 16> Intervals;
														
 
															+    SmallVector<unsigned, 16> OrigRegs;
														
 
															+    SlotIndex MinInd, MaxInd;
														
 
															+    for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
														
 
															+      const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
														
 
															+      unsigned Reg = Op.getReg();
														
 
															+      LiveInterval *LI = &LIS->getInterval(Reg);
														
 
															+      if (llvm::find(Intervals, LI) != Intervals.end()) {
														
 
															+        // Same register used, unable to make sequential
														
 
															+        Intervals.clear();
														
 
															+        break;
														
 
															+      }
														
 
															+      Intervals.push_back(LI);
														
 
															+      OrigRegs.push_back(VRM->getPhys(Reg));
														
 
															+      MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
														
 
															+      MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
														
 
															+    }
														
 
															+
														
 
															+    if (Intervals.empty())
														
 
															+      continue;
														
 
															+
														
 
															+    LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
														
 
															+                      << "\tOriginal allocation:\t";
														
 
															+               for(auto *LI : Intervals)
														
 
															+                 dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
														
 
															+               dbgs() << '\n');
														
 
															+
														
 
															+    bool Success = scavengeRegs(Intervals);
														
 
															+    if (!Success) {
														
 
															+      LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
														
 
															+      if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
														
 
															+        continue;
														
 
															+    } else {
														
 
															+      // Check we did not make it worse for other instructions.
														
 
															+      auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
														
 
															+                                [this](const Candidate &C, SlotIndex I) {
														
 
															+                                  return LIS->getInstructionIndex(*C.first) < I;
														
 
															+                                });
														
 
															+      for (auto E = Candidates.end(); Success && I != E &&
														
 
															+              LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
														
 
															+        if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
														
 
															+          Success = false;
														
 
															+          LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    if (!Success) {
														
 
															+      for (unsigned I = 0; I < Info->VAddrDwords; ++I)
														
 
															+        if (VRM->hasPhys(Intervals[I]->reg))
														
 
															+          LRM->unassign(*Intervals[I]);
														
 
															+
														
 
															+      for (unsigned I = 0; I < Info->VAddrDwords; ++I)
														
 
															+        LRM->assign(*Intervals[I], OrigRegs[I]);
														
 
															+
														
 
															+      continue;
														
 
															+    }
														
 
															+
														
 
															+    C.second = true;
														
 
															+    ++NumNSAConverted;
														
 
															+    LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
														
 
															+                 << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
														
 
															+                 << " : "
														
 
															+                 << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
														
 
															+                 << "]\n");
														
 
															+    Changed = true;
														
 
															+  }
														
 
															+
														
 
															+  return Changed;
														
 
															+}
														
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
@@ -1,10 +1,12 @@
 
															-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SIVI,PRT %s
														
 
															-; RUN: llc -march=amdgcn -mcpu=fiji  -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SIVI,PRT %s
														
 
															-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,PRT %s
														
 
															+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,SI %s
														
 
															+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,GFX8910,SIVI,PRT %s
														
 
															+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,PRT %s
														
 
															 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,NOPRT %s
														
 
															+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
														
 
															 ; GCN-LABEL: {{^}}load_1d:
														
 
															-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -22,7 +24,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm tfe{{$}}
														
 
															+; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm tfe{{$}}
														
 
															+; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
														
@@ -45,7 +48,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
														
 
															+; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
														
 
															+; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
														
@@ -58,7 +62,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_2d:
														
 
															-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -76,7 +81,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
														
@@ -89,7 +95,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_3d:
														
 
															-; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -107,7 +114,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
														
@@ -120,7 +128,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_cube:
														
 
															-; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -138,7 +147,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
														
@@ -151,7 +161,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_1darray:
														
 
															-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -169,7 +180,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) {
														
@@ -182,7 +194,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_2darray:
														
 
															-; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -200,7 +213,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
														
@@ -213,7 +227,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_2dmsaa:
														
 
															-; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -231,7 +246,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
														
@@ -244,7 +260,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_2darraymsaa:
														
 
															-; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -262,7 +279,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
														
 
															+; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
														
 
															+; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
														
@@ -275,7 +293,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_mip_1d:
														
 
															-; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -293,7 +312,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe{{$}}
														
 
															+; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe{{$}}
														
 
															+; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) {
														
@@ -306,7 +326,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_mip_2d:
														
 
															-; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -324,7 +345,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v3
														
 
															-; GCN: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
														
 
															+; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
														
 
															+; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v4, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
														
 
															 define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) {
														
@@ -432,7 +454,8 @@ main_body:
 
															 ; NOPRT-NOT: v_mov_b32_e32 v0
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v2
														
 
															-; GCN: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 unorm tfe{{$}}
														
 
															+; GFX6789: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 unorm tfe{{$}}
														
 
															+; GFX10: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v3, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v3
														
 
															 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
														
@@ -451,7 +474,8 @@ main_body:
 
															 ; NOPRT: v_mov_b32_e32 v2, 0
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v0
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v1
														
 
															-; GCN: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 unorm tfe{{$}}
														
 
															+; GFX6789: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 unorm tfe{{$}}
														
 
															+; GFX10: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v2, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v2
														
 
															 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
														
@@ -468,7 +492,8 @@ main_body:
 
															 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
														
 
															 ; NOPRT: v_mov_b32_e32 v1, 0
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v0
														
 
															-; GCN: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
														
 
															+; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
														
 
															+; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v1, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
														
 
															 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
														
@@ -485,7 +510,8 @@ main_body:
 
															 ; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
														
 
															 ; NOPRT: v_mov_b32_e32 v1, 0
														
 
															 ; NOPRT-NOT: v_mov_b32_e32 v0
														
 
															-; GCN: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
														
 
															+; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
														
 
															+; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
														
 
															 ; SIVI: buffer_store_dword v1, off, s[8:11], 0
														
 
															 ; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
														
 
															 define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
														
@@ -499,7 +525,8 @@ main_body:
 
															 ; GCN-LABEL: {{^}}load_mip_3d:
														
 
															-; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -507,7 +534,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_mip_cube:
														
 
															-; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -515,7 +543,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_mip_1darray:
														
 
															-; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -523,7 +552,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_mip_2darray:
														
 
															-; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -531,7 +561,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1d:
														
 
															-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -539,7 +570,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_2d:
														
 
															-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
														
 
															 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -547,7 +579,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_3d:
														
 
															-; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
														
 
															 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -555,7 +588,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_cube:
														
 
															-; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
														
 
															 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -563,7 +597,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1darray:
														
 
															-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
														
 
															 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -571,7 +606,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_2darray:
														
 
															-; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
														
 
															 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -579,7 +615,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_2dmsaa:
														
 
															-; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
														
 
															 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -587,7 +624,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_2darraymsaa:
														
 
															-; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
														
 
															 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -595,7 +633,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_mip_1d:
														
 
															-; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -603,7 +642,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_mip_2d:
														
 
															-; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
														
 
															 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -611,7 +651,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_mip_3d:
														
 
															-; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
														
 
															 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -619,7 +660,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_mip_cube:
														
 
															-; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
														
 
															 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -627,7 +669,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_mip_1darray:
														
 
															-; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
														
 
															 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -635,7 +678,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_mip_2darray:
														
 
															-; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
														
 
															 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -643,7 +687,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_1d:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -651,7 +696,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_2d:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -659,7 +705,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_3d:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -667,7 +714,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_cube:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -675,7 +723,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_1darray:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -683,7 +732,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_2darray:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -691,7 +741,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_2dmsaa:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -699,7 +750,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
														
 
															-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
														
 
															+; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
														
 
															 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -707,7 +759,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_1d_V1:
														
 
															-; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
														
 
															+; GFX6789: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
														
 
															+; GFX10: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
														
 
															 main_body:
														
 
															   %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -715,7 +768,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_1d_V2:
														
 
															-; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
														
 
															+; GFX6789: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
														
 
															+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
														
 
															 main_body:
														
 
															   %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -723,7 +777,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1d_V1:
														
 
															-; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
														
 
															+; GFX6789: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
														
 
															+; GFX10: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -731,7 +786,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1d_V2:
														
 
															-; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
														
 
															+; GFX6789: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
														
 
															+; GFX10: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm ;
														
 
															 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
														
@@ -739,7 +795,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_1d_glc:
														
 
															-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
														
 
															+; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
														
 
															+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
														
 
															 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
														
@@ -747,7 +804,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_1d_slc:
														
 
															-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
														
 
															+; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
														
 
															+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
														
 
															 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
														
@@ -755,7 +813,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}load_1d_glc_slc:
														
 
															-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
														
 
															+; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
														
 
															+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
														
 
															 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
														
 
															 main_body:
														
 
															   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
														
@@ -763,7 +822,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1d_glc:
														
 
															-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
														
 
															+; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
														
 
															+; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
														
 
															 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
														
@@ -771,7 +831,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1d_slc:
														
 
															-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
														
 
															+; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
														
 
															+; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
														
 
															 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
														
@@ -779,7 +840,8 @@ main_body:
 
															 }
														
 
															 ; GCN-LABEL: {{^}}store_1d_glc_slc:
														
 
															-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
														
 
															+; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
														
 
															+; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
														
 
															 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
														
@@ -798,11 +860,11 @@ main_body:
 
															 ; Ideally, the register allocator would avoid the wait here
														
 
															 ;
														
 
															 ; GCN-LABEL: {{^}}image_store_wait:
														
 
															-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
														
 
															+; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf
														
 
															 ; SI: s_waitcnt expcnt(0)
														
 
															-; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
														
 
															+; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf
														
 
															 ; GCN: s_waitcnt vmcnt(0)
														
 
															-; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
														
 
															+; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf
														
 
															 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
														
 
															 main_body:
														
 
															   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
														
@@ -812,10 +874,10 @@ main_body:
 
															 }
														
 
															 ; SI won't merge ds memory operations, because of the signed offset bug, so
														
 
															-; we only have check lines for VI.
														
 
															-; VI-LABEL: image_load_mmo
														
 
															-; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
														
 
															-; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
														
 
															+; we only have check lines for VI+.
														
 
															+; GFX8910-LABEL: image_load_mmo
														
 
															+; GFX8910: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
														
 
															+; GFX8910: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
														
 
															 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
														
 
															   store float 0.000000e+00, float addrspace(3)* %lds
														
 
															   %c0 = extractelement <2 x i32> %c, i32 0
														
--- a/test/CodeGen/AMDGPU/nsa-reassign.ll
+++ b/test/CodeGen/AMDGPU/nsa-reassign.ll
@@ -0,0 +1,102 @@
 
															+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
														
 
															+
														
 
															+; GCN-LABEL: {{^}}sample_contig_nsa:
														
 
															+; GCN-DAG: image_sample_c_l v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
														
 
															+define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
														
 
															+main_body:
														
 
															+  %zcompare.1 = fadd float %zcompare, 1.0
														
 
															+  %s1.1 = fadd float %s1, 1.0
														
 
															+  %t1.1 = fadd float %t1, 1.0
														
 
															+  %r1.1 = fadd float %r1, 1.0
														
 
															+  %s2.1 = fadd float %s2, 1.0
														
 
															+  %t2.1 = fadd float %t2, 1.0
														
 
															+  %r2.1 = fadd float %r2, 1.0
														
 
															+  %lod.1 = fadd float %lod, 1.0
														
 
															+  %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare.1, float %s1.1, float %t1.1, float %r1.1, float %lod.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %r.0 = insertelement <2 x float> undef, float %v1, i32 0
														
 
															+  %r = insertelement <2 x float> %r.0, float %v2, i32 1
														
 
															+  ret <2 x float> %r
														
 
															+}
														
 
															+
														
 
															+; GCN-LABEL: {{^}}sample_contig_nsa_10vgprs:
														
 
															+; GCN-DAG: image_sample_c_l v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}],
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
														
 
															+define amdgpu_ps <2 x float> @sample_contig_nsa_10vgprs(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) #0 {
														
 
															+main_body:
														
 
															+  %zcompare.1 = fadd float %zcompare, 1.0
														
 
															+  %s1.1 = fadd float %s1, 1.0
														
 
															+  %t1.1 = fadd float %t1, 1.0
														
 
															+  %r1.1 = fadd float %r1, 1.0
														
 
															+  %s2.1 = fadd float %s2, 1.0
														
 
															+  %t2.1 = fadd float %t2, 1.0
														
 
															+  %r2.1 = fadd float %r2, 1.0
														
 
															+  %lod.1 = fadd float %lod, 1.0
														
 
															+  %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare.1, float %s1.1, float %t1.1, float %r1.1, float %lod.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %r.0 = insertelement <2 x float> undef, float %v1, i32 0
														
 
															+  %r = insertelement <2 x float> %r.0, float %v2, i32 1
														
 
															+  ret <2 x float> %r
														
 
															+}
														
 
															+
														
 
															+; GCN-LABEL: {{^}}sample_contig_nsa_conflict:
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
														
 
															+define amdgpu_ps <2 x float> @sample_contig_nsa_conflict(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
														
 
															+main_body:
														
 
															+  %zcompare.1 = fadd float %zcompare, 1.0
														
 
															+  %s1.1 = fadd float %s1, 1.0
														
 
															+  %t1.1 = fadd float %t1, 1.0
														
 
															+  %r1.1 = fadd float %r1, 1.0
														
 
															+  %s2.1 = fadd float %s2, 1.0
														
 
															+  %t2.1 = fadd float %t2, 1.0
														
 
															+  %r2.1 = fadd float %r2, 1.0
														
 
															+  %lod.1 = fadd float %lod, 1.0
														
 
															+  %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %v1 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %t2.1, float %s2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %r.0 = insertelement <2 x float> undef, float %v1, i32 0
														
 
															+  %r = insertelement <2 x float> %r.0, float %v2, i32 1
														
 
															+  ret <2 x float> %r
														
 
															+}
														
 
															+
														
 
															+; GCN-LABEL: {{^}}sample_contig_nsa_same_addr:
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
														
 
															+define amdgpu_ps <2 x float> @sample_contig_nsa_same_addr(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
														
 
															+main_body:
														
 
															+  %zcompare.1 = fadd float %zcompare, 1.0
														
 
															+  %s1.1 = fadd float %s1, 1.0
														
 
															+  %t1.1 = fadd float %t1, 1.0
														
 
															+  %r1.1 = fadd float %r1, 1.0
														
 
															+  %s2.1 = fadd float %s2, 1.0
														
 
															+  %t2.1 = fadd float %t2, 1.0
														
 
															+  %r2.1 = fadd float %r2, 1.0
														
 
															+  %lod.1 = fadd float %lod, 1.0
														
 
															+  %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
														
 
															+  %v1 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  %r.0 = insertelement <2 x float> undef, float %v1, i32 0
														
 
															+  %r = insertelement <2 x float> %r.0, float %v2, i32 1
														
 
															+  ret <2 x float> %r
														
 
															+}
														
 
															+
														
 
															+; GCN-LABEL: {{^}}sample_contig_nsa_same_reg:
														
 
															+; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
														
 
															+define amdgpu_ps float @sample_contig_nsa_same_reg(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
														
 
															+main_body:
														
 
															+  %zcompare.1 = fadd float %zcompare, 1.0
														
 
															+  %s1.1 = fadd float %s1, 1.0
														
 
															+  %t1.1 = fadd float %t1, 1.0
														
 
															+  %r1.1 = fadd float %r1, 1.0
														
 
															+  %s2.1 = fadd float %s2, 1.0
														
 
															+  %t2.1 = fadd float %t2, 1.0
														
 
															+  %r2.1 = fadd float %r2, 1.0
														
 
															+  %lod.1 = fadd float %lod, 1.0
														
 
															+  %v = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %t2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
														
 
															+  ret float %v
														
 
															+}
														
 
															+
														
 
															+declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
														
 
															+declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
														
 
															+
														
 
															+attributes #0 = {"amdgpu-num-vgpr"="10"}