|
@@ -1,3 +1,4 @@
|
|
|
|
+//===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
|
|
//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
//
|
|
@@ -11,31 +12,55 @@
|
|
//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
-#include "NVPTXISelLowering.h"
|
|
|
|
|
|
+#include "MCTargetDesc/NVPTXBaseInfo.h"
|
|
#include "NVPTX.h"
|
|
#include "NVPTX.h"
|
|
|
|
+#include "NVPTXISelLowering.h"
|
|
|
|
+#include "NVPTXSection.h"
|
|
|
|
+#include "NVPTXSubtarget.h"
|
|
#include "NVPTXTargetMachine.h"
|
|
#include "NVPTXTargetMachine.h"
|
|
#include "NVPTXTargetObjectFile.h"
|
|
#include "NVPTXTargetObjectFile.h"
|
|
#include "NVPTXUtilities.h"
|
|
#include "NVPTXUtilities.h"
|
|
|
|
+#include "llvm/ADT/APInt.h"
|
|
|
|
+#include "llvm/ADT/SmallVector.h"
|
|
|
|
+#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/CodeGen/Analysis.h"
|
|
#include "llvm/CodeGen/Analysis.h"
|
|
-#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
-#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
-#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
|
|
|
|
+#include "llvm/CodeGen/MachineMemOperand.h"
|
|
|
|
+#include "llvm/CodeGen/MachineValueType.h"
|
|
|
|
+#include "llvm/CodeGen/SelectionDAG.h"
|
|
|
|
+#include "llvm/CodeGen/SelectionDAGNodes.h"
|
|
|
|
+#include "llvm/CodeGen/ValueTypes.h"
|
|
|
|
+#include "llvm/IR/Argument.h"
|
|
|
|
+#include "llvm/IR/Attributes.h"
|
|
#include "llvm/IR/CallSite.h"
|
|
#include "llvm/IR/CallSite.h"
|
|
|
|
+#include "llvm/IR/Constants.h"
|
|
|
|
+#include "llvm/IR/DataLayout.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/GlobalValue.h"
|
|
#include "llvm/IR/GlobalValue.h"
|
|
-#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
-#include "llvm/IR/Intrinsics.h"
|
|
|
|
|
|
+#include "llvm/IR/Instruction.h"
|
|
|
|
+#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/IR/Module.h"
|
|
-#include "llvm/MC/MCSectionELF.h"
|
|
|
|
|
|
+#include "llvm/IR/Type.h"
|
|
|
|
+#include "llvm/IR/Value.h"
|
|
|
|
+#include "llvm/Support/Casting.h"
|
|
|
|
+#include "llvm/Support/CodeGen.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
-#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
+#include "llvm/Target/TargetCallingConv.h"
|
|
|
|
+#include "llvm/Target/TargetLowering.h"
|
|
|
|
+#include "llvm/Target/TargetMachine.h"
|
|
|
|
+#include "llvm/Target/TargetOptions.h"
|
|
|
|
+#include <algorithm>
|
|
|
|
+#include <cassert>
|
|
|
|
+#include <cstdint>
|
|
|
|
+#include <iterator>
|
|
#include <sstream>
|
|
#include <sstream>
|
|
|
|
+#include <string>
|
|
|
|
+#include <utility>
|
|
|
|
+#include <vector>
|
|
|
|
|
|
#undef DEBUG_TYPE
|
|
#undef DEBUG_TYPE
|
|
#define DEBUG_TYPE "nvptx-lower"
|
|
#define DEBUG_TYPE "nvptx-lower"
|
|
@@ -109,7 +134,6 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
|
|
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
|
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
|
const NVPTXSubtarget &STI)
|
|
const NVPTXSubtarget &STI)
|
|
: TargetLowering(TM), nvTM(&TM), STI(STI) {
|
|
: TargetLowering(TM), nvTM(&TM), STI(STI) {
|
|
-
|
|
|
|
// always lower memset, memcpy, and memmove intrinsics to load/store
|
|
// always lower memset, memcpy, and memmove intrinsics to load/store
|
|
// instructions, rather
|
|
// instructions, rather
|
|
// then generating calls to memset, mempcy or memmove.
|
|
// then generating calls to memset, mempcy or memmove.
|
|
@@ -981,7 +1005,7 @@ std::string NVPTXTargetLowering::getPrototype(
|
|
unsigned align = 0;
|
|
unsigned align = 0;
|
|
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
|
|
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
|
|
// +1 because index 0 is reserved for return type alignment
|
|
// +1 because index 0 is reserved for return type alignment
|
|
- if (!llvm::getAlign(*CallI, i + 1, align))
|
|
|
|
|
|
+ if (!getAlign(*CallI, i + 1, align))
|
|
align = DL.getABITypeAlignment(Ty);
|
|
align = DL.getABITypeAlignment(Ty);
|
|
unsigned sz = DL.getTypeAllocSize(Ty);
|
|
unsigned sz = DL.getTypeAllocSize(Ty);
|
|
O << ".param .align " << align << " .b8 ";
|
|
O << ".param .align " << align << " .b8 ";
|
|
@@ -1047,7 +1071,7 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
|
|
// With bitcast'd call targets, the instruction will be the call
|
|
// With bitcast'd call targets, the instruction will be the call
|
|
if (isa<CallInst>(CalleeI)) {
|
|
if (isa<CallInst>(CalleeI)) {
|
|
// Check if we have call alignment metadata
|
|
// Check if we have call alignment metadata
|
|
- if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
|
|
|
|
|
|
+ if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
|
|
return Align;
|
|
return Align;
|
|
|
|
|
|
const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
|
|
const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
|
|
@@ -1070,7 +1094,7 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
|
|
// Check for function alignment information if we found that the
|
|
// Check for function alignment information if we found that the
|
|
// ultimate target is a Function
|
|
// ultimate target is a Function
|
|
if (DirectCallee)
|
|
if (DirectCallee)
|
|
- if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
|
|
|
|
|
|
+ if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
|
|
return Align;
|
|
return Align;
|
|
|
|
|
|
// Call is indirect or alignment information is not available, fall back to
|
|
// Call is indirect or alignment information is not available, fall back to
|
|
@@ -1747,7 +1771,6 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
|
|
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
|
|
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
|
|
|
|
|
|
if (VTBits == 32 && STI.getSmVersion() >= 35) {
|
|
if (VTBits == 32 && STI.getSmVersion() >= 35) {
|
|
-
|
|
|
|
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
|
|
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
|
|
// {dHi, dLo} = {aHi, aLo} >> Amt
|
|
// {dHi, dLo} = {aHi, aLo} >> Amt
|
|
// dHi = aHi >> Amt
|
|
// dHi = aHi >> Amt
|
|
@@ -1761,7 +1784,6 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
|
|
return DAG.getMergeValues(Ops, dl);
|
|
return DAG.getMergeValues(Ops, dl);
|
|
}
|
|
}
|
|
else {
|
|
else {
|
|
-
|
|
|
|
// {dHi, dLo} = {aHi, aLo} >> Amt
|
|
// {dHi, dLo} = {aHi, aLo} >> Amt
|
|
// - if (Amt>=size) then
|
|
// - if (Amt>=size) then
|
|
// dLo = aHi >> (Amt-size)
|
|
// dLo = aHi >> (Amt-size)
|
|
@@ -1809,7 +1831,6 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
|
|
SDValue ShAmt = Op.getOperand(2);
|
|
SDValue ShAmt = Op.getOperand(2);
|
|
|
|
|
|
if (VTBits == 32 && STI.getSmVersion() >= 35) {
|
|
if (VTBits == 32 && STI.getSmVersion() >= 35) {
|
|
-
|
|
|
|
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
|
|
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
|
|
// {dHi, dLo} = {aHi, aLo} << Amt
|
|
// {dHi, dLo} = {aHi, aLo} << Amt
|
|
// dHi = shf.l.clamp aLo, aHi, Amt
|
|
// dHi = shf.l.clamp aLo, aHi, Amt
|
|
@@ -1823,7 +1844,6 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
|
|
return DAG.getMergeValues(Ops, dl);
|
|
return DAG.getMergeValues(Ops, dl);
|
|
}
|
|
}
|
|
else {
|
|
else {
|
|
-
|
|
|
|
// {dHi, dLo} = {aHi, aLo} << Amt
|
|
// {dHi, dLo} = {aHi, aLo} << Amt
|
|
// - if (Amt>=size) then
|
|
// - if (Amt>=size) then
|
|
// dLo = aLo << Amt (all 0)
|
|
// dLo = aLo << Amt (all 0)
|
|
@@ -2002,11 +2022,10 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
|
|
case 2:
|
|
case 2:
|
|
Opcode = NVPTXISD::StoreV2;
|
|
Opcode = NVPTXISD::StoreV2;
|
|
break;
|
|
break;
|
|
- case 4: {
|
|
|
|
|
|
+ case 4:
|
|
Opcode = NVPTXISD::StoreV4;
|
|
Opcode = NVPTXISD::StoreV4;
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
- }
|
|
|
|
|
|
|
|
SmallVector<SDValue, 8> Ops;
|
|
SmallVector<SDValue, 8> Ops;
|
|
|
|
|
|
@@ -2140,7 +2159,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
theArgs[i],
|
|
theArgs[i],
|
|
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
|
|
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
|
|
: nullptr))) {
|
|
: nullptr))) {
|
|
- assert(llvm::isKernelFunction(*F) &&
|
|
|
|
|
|
+ assert(isKernelFunction(*F) &&
|
|
"Only kernels can have image/sampler params");
|
|
"Only kernels can have image/sampler params");
|
|
InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
|
|
InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
|
|
continue;
|
|
continue;
|
|
@@ -2193,7 +2212,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
0);
|
|
0);
|
|
assert(vtparts.size() > 0 && "empty aggregate type not expected");
|
|
assert(vtparts.size() > 0 && "empty aggregate type not expected");
|
|
bool aggregateIsPacked = false;
|
|
bool aggregateIsPacked = false;
|
|
- if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
|
|
|
|
|
|
+ if (StructType *STy = dyn_cast<StructType>(Ty))
|
|
aggregateIsPacked = STy->isPacked();
|
|
aggregateIsPacked = STy->isPacked();
|
|
|
|
|
|
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
|
|
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
|
|
@@ -2202,7 +2221,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
EVT partVT = vtparts[parti];
|
|
EVT partVT = vtparts[parti];
|
|
Value *srcValue = Constant::getNullValue(
|
|
Value *srcValue = Constant::getNullValue(
|
|
PointerType::get(partVT.getTypeForEVT(F->getContext()),
|
|
PointerType::get(partVT.getTypeForEVT(F->getContext()),
|
|
- llvm::ADDRESS_SPACE_PARAM));
|
|
|
|
|
|
+ ADDRESS_SPACE_PARAM));
|
|
SDValue srcAddr =
|
|
SDValue srcAddr =
|
|
DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
|
|
DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
|
|
DAG.getConstant(offsets[parti], dl, PtrVT));
|
|
DAG.getConstant(offsets[parti], dl, PtrVT));
|
|
@@ -2242,7 +2261,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
if (NumElts == 1) {
|
|
if (NumElts == 1) {
|
|
// We only have one element, so just directly load it
|
|
// We only have one element, so just directly load it
|
|
Value *SrcValue = Constant::getNullValue(PointerType::get(
|
|
Value *SrcValue = Constant::getNullValue(PointerType::get(
|
|
- EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
|
|
|
|
|
|
+ EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
|
|
SDValue P = DAG.getLoad(
|
|
SDValue P = DAG.getLoad(
|
|
EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
|
|
EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
|
|
DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())),
|
|
DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())),
|
|
@@ -2260,7 +2279,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
// f32,f32 = load ...
|
|
// f32,f32 = load ...
|
|
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
|
|
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
|
|
Value *SrcValue = Constant::getNullValue(PointerType::get(
|
|
Value *SrcValue = Constant::getNullValue(PointerType::get(
|
|
- VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
|
|
|
|
|
|
+ VecVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
|
|
SDValue P = DAG.getLoad(
|
|
SDValue P = DAG.getLoad(
|
|
VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
|
|
VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
|
|
DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
|
|
DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
|
|
@@ -2301,7 +2320,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
for (unsigned i = 0; i < NumElts; i += VecSize) {
|
|
for (unsigned i = 0; i < NumElts; i += VecSize) {
|
|
Value *SrcValue = Constant::getNullValue(
|
|
Value *SrcValue = Constant::getNullValue(
|
|
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
|
|
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
|
|
- llvm::ADDRESS_SPACE_PARAM));
|
|
|
|
|
|
+ ADDRESS_SPACE_PARAM));
|
|
SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
|
|
SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
|
|
DAG.getConstant(Ofst, dl, PtrVT));
|
|
DAG.getConstant(Ofst, dl, PtrVT));
|
|
SDValue P = DAG.getLoad(
|
|
SDValue P = DAG.getLoad(
|
|
@@ -2335,7 +2354,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
|
|
// If ABI, load from the param symbol
|
|
// If ABI, load from the param symbol
|
|
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
|
|
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
|
|
Value *srcValue = Constant::getNullValue(PointerType::get(
|
|
Value *srcValue = Constant::getNullValue(PointerType::get(
|
|
- ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
|
|
|
|
|
|
+ ObjectVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
|
|
SDValue p;
|
|
SDValue p;
|
|
if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
|
|
if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
|
|
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
|
|
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
|
|
@@ -2424,7 +2443,6 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
|
|
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
|
|
DAG.getVTList(MVT::Other), Ops,
|
|
DAG.getVTList(MVT::Other), Ops,
|
|
EltVT, MachinePointerInfo());
|
|
EltVT, MachinePointerInfo());
|
|
-
|
|
|
|
} else if (NumElts == 2) {
|
|
} else if (NumElts == 2) {
|
|
// V2 store
|
|
// V2 store
|
|
SDValue StoreVal0 = OutVals[0];
|
|
SDValue StoreVal0 = OutVals[0];
|
|
@@ -2558,7 +2576,6 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|
return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
|
|
return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
void NVPTXTargetLowering::LowerAsmOperandForConstraint(
|
|
void NVPTXTargetLowering::LowerAsmOperandForConstraint(
|
|
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
|
|
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
|
|
SelectionDAG &DAG) const {
|
|
SelectionDAG &DAG) const {
|
|
@@ -3306,7 +3323,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
Info.memVT = getValueType(DL, I.getType());
|
|
Info.memVT = getValueType(DL, I.getType());
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = true;
|
|
Info.writeMem = true;
|
|
Info.align = 0;
|
|
Info.align = 0;
|
|
@@ -3326,7 +3343,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
Info.memVT = getValueType(DL, I.getType());
|
|
Info.memVT = getValueType(DL, I.getType());
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
|
Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
|
@@ -3347,7 +3364,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
Info.memVT = getValueType(DL, I.getType());
|
|
Info.memVT = getValueType(DL, I.getType());
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.ptrVal = I.getArgOperand(0);
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
|
Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
|
|
@@ -3410,17 +3427,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
|
|
- case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: {
|
|
|
|
|
|
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32:
|
|
Info.opc = getOpcForTextureInstr(Intrinsic);
|
|
Info.opc = getOpcForTextureInstr(Intrinsic);
|
|
Info.memVT = MVT::v4f32;
|
|
Info.memVT = MVT::v4f32;
|
|
Info.ptrVal = nullptr;
|
|
Info.ptrVal = nullptr;
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = 16;
|
|
Info.align = 16;
|
|
return true;
|
|
return true;
|
|
- }
|
|
|
|
|
|
+
|
|
case Intrinsic::nvvm_tex_1d_v4s32_s32:
|
|
case Intrinsic::nvvm_tex_1d_v4s32_s32:
|
|
case Intrinsic::nvvm_tex_1d_v4s32_f32:
|
|
case Intrinsic::nvvm_tex_1d_v4s32_f32:
|
|
case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
|
|
case Intrinsic::nvvm_tex_1d_level_v4s32_f32:
|
|
@@ -3532,17 +3549,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
|
|
case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32:
|
|
- case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: {
|
|
|
|
|
|
+ case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32:
|
|
Info.opc = getOpcForTextureInstr(Intrinsic);
|
|
Info.opc = getOpcForTextureInstr(Intrinsic);
|
|
Info.memVT = MVT::v4i32;
|
|
Info.memVT = MVT::v4i32;
|
|
Info.ptrVal = nullptr;
|
|
Info.ptrVal = nullptr;
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = 16;
|
|
Info.align = 16;
|
|
return true;
|
|
return true;
|
|
- }
|
|
|
|
|
|
+
|
|
case Intrinsic::nvvm_suld_1d_i8_clamp:
|
|
case Intrinsic::nvvm_suld_1d_i8_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i8_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i8_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v4i8_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v4i8_clamp:
|
|
@@ -3587,17 +3604,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
|
|
case Intrinsic::nvvm_suld_2d_array_v4i8_zero:
|
|
case Intrinsic::nvvm_suld_3d_i8_zero:
|
|
case Intrinsic::nvvm_suld_3d_i8_zero:
|
|
case Intrinsic::nvvm_suld_3d_v2i8_zero:
|
|
case Intrinsic::nvvm_suld_3d_v2i8_zero:
|
|
- case Intrinsic::nvvm_suld_3d_v4i8_zero: {
|
|
|
|
|
|
+ case Intrinsic::nvvm_suld_3d_v4i8_zero:
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.memVT = MVT::i8;
|
|
Info.memVT = MVT::i8;
|
|
Info.ptrVal = nullptr;
|
|
Info.ptrVal = nullptr;
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = 16;
|
|
Info.align = 16;
|
|
return true;
|
|
return true;
|
|
- }
|
|
|
|
|
|
+
|
|
case Intrinsic::nvvm_suld_1d_i16_clamp:
|
|
case Intrinsic::nvvm_suld_1d_i16_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i16_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i16_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v4i16_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v4i16_clamp:
|
|
@@ -3642,17 +3659,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
|
|
case Intrinsic::nvvm_suld_2d_array_v4i16_zero:
|
|
case Intrinsic::nvvm_suld_3d_i16_zero:
|
|
case Intrinsic::nvvm_suld_3d_i16_zero:
|
|
case Intrinsic::nvvm_suld_3d_v2i16_zero:
|
|
case Intrinsic::nvvm_suld_3d_v2i16_zero:
|
|
- case Intrinsic::nvvm_suld_3d_v4i16_zero: {
|
|
|
|
|
|
+ case Intrinsic::nvvm_suld_3d_v4i16_zero:
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.memVT = MVT::i16;
|
|
Info.memVT = MVT::i16;
|
|
Info.ptrVal = nullptr;
|
|
Info.ptrVal = nullptr;
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = 16;
|
|
Info.align = 16;
|
|
return true;
|
|
return true;
|
|
- }
|
|
|
|
|
|
+
|
|
case Intrinsic::nvvm_suld_1d_i32_clamp:
|
|
case Intrinsic::nvvm_suld_1d_i32_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i32_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i32_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v4i32_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v4i32_clamp:
|
|
@@ -3697,17 +3714,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
|
|
case Intrinsic::nvvm_suld_2d_array_v4i32_zero:
|
|
case Intrinsic::nvvm_suld_3d_i32_zero:
|
|
case Intrinsic::nvvm_suld_3d_i32_zero:
|
|
case Intrinsic::nvvm_suld_3d_v2i32_zero:
|
|
case Intrinsic::nvvm_suld_3d_v2i32_zero:
|
|
- case Intrinsic::nvvm_suld_3d_v4i32_zero: {
|
|
|
|
|
|
+ case Intrinsic::nvvm_suld_3d_v4i32_zero:
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.memVT = MVT::i32;
|
|
Info.memVT = MVT::i32;
|
|
Info.ptrVal = nullptr;
|
|
Info.ptrVal = nullptr;
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = 16;
|
|
Info.align = 16;
|
|
return true;
|
|
return true;
|
|
- }
|
|
|
|
|
|
+
|
|
case Intrinsic::nvvm_suld_1d_i64_clamp:
|
|
case Intrinsic::nvvm_suld_1d_i64_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i64_clamp:
|
|
case Intrinsic::nvvm_suld_1d_v2i64_clamp:
|
|
case Intrinsic::nvvm_suld_1d_array_i64_clamp:
|
|
case Intrinsic::nvvm_suld_1d_array_i64_clamp:
|
|
@@ -3737,18 +3754,17 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
case Intrinsic::nvvm_suld_2d_array_i64_zero:
|
|
case Intrinsic::nvvm_suld_2d_array_i64_zero:
|
|
case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
|
|
case Intrinsic::nvvm_suld_2d_array_v2i64_zero:
|
|
case Intrinsic::nvvm_suld_3d_i64_zero:
|
|
case Intrinsic::nvvm_suld_3d_i64_zero:
|
|
- case Intrinsic::nvvm_suld_3d_v2i64_zero: {
|
|
|
|
|
|
+ case Intrinsic::nvvm_suld_3d_v2i64_zero:
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.opc = getOpcForSurfaceInstr(Intrinsic);
|
|
Info.memVT = MVT::i64;
|
|
Info.memVT = MVT::i64;
|
|
Info.ptrVal = nullptr;
|
|
Info.ptrVal = nullptr;
|
|
Info.offset = 0;
|
|
Info.offset = 0;
|
|
- Info.vol = 0;
|
|
|
|
|
|
+ Info.vol = false;
|
|
Info.readMem = true;
|
|
Info.readMem = true;
|
|
Info.writeMem = false;
|
|
Info.writeMem = false;
|
|
Info.align = 16;
|
|
Info.align = 16;
|
|
return true;
|
|
return true;
|
|
}
|
|
}
|
|
- }
|
|
|
|
return false;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -3760,7 +3776,6 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
|
|
bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|
bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|
const AddrMode &AM, Type *Ty,
|
|
const AddrMode &AM, Type *Ty,
|
|
unsigned AS) const {
|
|
unsigned AS) const {
|
|
-
|
|
|
|
// AddrMode - This represents an addressing mode of:
|
|
// AddrMode - This represents an addressing mode of:
|
|
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
|
|
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
|
|
//
|
|
//
|
|
@@ -4059,7 +4074,7 @@ static SDValue PerformANDCombine(SDNode *N,
|
|
}
|
|
}
|
|
|
|
|
|
bool AddTo = false;
|
|
bool AddTo = false;
|
|
- if (AExt.getNode() != 0) {
|
|
|
|
|
|
+ if (AExt.getNode() != nullptr) {
|
|
// Re-insert the ext as a zext.
|
|
// Re-insert the ext as a zext.
|
|
Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
|
|
Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
|
|
AExt.getValueType(), Val);
|
|
AExt.getValueType(), Val);
|
|
@@ -4204,7 +4219,6 @@ static bool IsMulWideOperandDemotable(SDValue Op,
|
|
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
|
|
static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
|
|
unsigned OptSize,
|
|
unsigned OptSize,
|
|
bool &IsSigned) {
|
|
bool &IsSigned) {
|
|
-
|
|
|
|
OperandSignedness LHSSign;
|
|
OperandSignedness LHSSign;
|
|
|
|
|
|
// The LHS operand must be a demotable op
|
|
// The LHS operand must be a demotable op
|