|
@@ -1186,17 +1186,23 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-static llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
|
|
|
|
- switch (type) {
|
|
|
|
- default: break;
|
|
|
|
- case 0:
|
|
|
|
- case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
|
|
|
|
- case 6:
|
|
|
|
- case 7:
|
|
|
|
- case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
|
|
|
|
- case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
|
|
|
|
- case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
|
|
|
|
- case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
|
|
|
|
|
|
+static llvm::VectorType *GetNeonType(LLVMContext &C, NeonTypeFlags Type) {
|
|
|
|
+ int IsQuad = Type.isQuad();
|
|
|
|
+ switch (Type.getEltType()) {
|
|
|
|
+ default: break;
|
|
|
|
+ case NeonTypeFlags::Int8:
|
|
|
|
+ case NeonTypeFlags::Poly8:
|
|
|
|
+ return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << IsQuad);
|
|
|
|
+ case NeonTypeFlags::Int16:
|
|
|
|
+ case NeonTypeFlags::Poly16:
|
|
|
|
+ case NeonTypeFlags::Float16:
|
|
|
|
+ return llvm::VectorType::get(llvm::Type::getInt16Ty(C), 4 << IsQuad);
|
|
|
|
+ case NeonTypeFlags::Int32:
|
|
|
|
+ return llvm::VectorType::get(llvm::Type::getInt32Ty(C), 2 << IsQuad);
|
|
|
|
+ case NeonTypeFlags::Int64:
|
|
|
|
+ return llvm::VectorType::get(llvm::Type::getInt64Ty(C), 1 << IsQuad);
|
|
|
|
+ case NeonTypeFlags::Float32:
|
|
|
|
+ return llvm::VectorType::get(llvm::Type::getFloatTy(C), 2 << IsQuad);
|
|
};
|
|
};
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -1363,14 +1369,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
|
}
|
|
}
|
|
|
|
|
|
// Determine the type of this overloaded NEON intrinsic.
|
|
// Determine the type of this overloaded NEON intrinsic.
|
|
- unsigned type = Result.getZExtValue();
|
|
|
|
- bool usgn = type & 0x08;
|
|
|
|
- bool quad = type & 0x10;
|
|
|
|
- bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
|
|
|
|
- (void)poly; // Only used in assert()s.
|
|
|
|
|
|
+ NeonTypeFlags Type(Result.getZExtValue());
|
|
|
|
+ bool usgn = Type.isUnsigned();
|
|
|
|
+ bool quad = Type.isQuad();
|
|
bool rightShift = false;
|
|
bool rightShift = false;
|
|
|
|
|
|
- llvm::VectorType *VTy = GetNeonType(getLLVMContext(), type & 0x7, quad);
|
|
|
|
|
|
+ llvm::VectorType *VTy = GetNeonType(getLLVMContext(), Type);
|
|
llvm::Type *Ty = VTy;
|
|
llvm::Type *Ty = VTy;
|
|
if (!Ty)
|
|
if (!Ty)
|
|
return 0;
|
|
return 0;
|
|
@@ -1429,34 +1433,43 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
|
return EmitNeonCall(F, Ops, "vcnt");
|
|
return EmitNeonCall(F, Ops, "vcnt");
|
|
}
|
|
}
|
|
case ARM::BI__builtin_neon_vcvt_f16_v: {
|
|
case ARM::BI__builtin_neon_vcvt_f16_v: {
|
|
- assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f16_v builtin");
|
|
|
|
|
|
+ assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
|
|
|
|
+ "unexpected vcvt_f16_v builtin");
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
|
|
return EmitNeonCall(F, Ops, "vcvt");
|
|
return EmitNeonCall(F, Ops, "vcvt");
|
|
}
|
|
}
|
|
case ARM::BI__builtin_neon_vcvt_f32_f16: {
|
|
case ARM::BI__builtin_neon_vcvt_f32_f16: {
|
|
- assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f32_f16 builtin");
|
|
|
|
|
|
+ assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
|
|
|
|
+ "unexpected vcvt_f32_f16 builtin");
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
|
|
return EmitNeonCall(F, Ops, "vcvt");
|
|
return EmitNeonCall(F, Ops, "vcvt");
|
|
}
|
|
}
|
|
case ARM::BI__builtin_neon_vcvt_f32_v:
|
|
case ARM::BI__builtin_neon_vcvt_f32_v:
|
|
- case ARM::BI__builtin_neon_vcvtq_f32_v: {
|
|
|
|
|
|
+ case ARM::BI__builtin_neon_vcvtq_f32_v:
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
- Ty = GetNeonType(getLLVMContext(), 4, quad);
|
|
|
|
|
|
+ Ty = GetNeonType(getLLVMContext(),
|
|
|
|
+ NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
|
|
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
|
|
return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
|
|
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
|
|
: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
|
|
- }
|
|
|
|
case ARM::BI__builtin_neon_vcvt_s32_v:
|
|
case ARM::BI__builtin_neon_vcvt_s32_v:
|
|
case ARM::BI__builtin_neon_vcvt_u32_v:
|
|
case ARM::BI__builtin_neon_vcvt_u32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_s32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_s32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_u32_v: {
|
|
case ARM::BI__builtin_neon_vcvtq_u32_v: {
|
|
- Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(getLLVMContext(), 4, quad));
|
|
|
|
|
|
+ llvm::Type *FloatTy =
|
|
|
|
+ GetNeonType(getLLVMContext(),
|
|
|
|
+ NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
|
|
|
|
+ Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
|
|
return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
|
|
return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
|
|
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
|
|
: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
|
|
}
|
|
}
|
|
case ARM::BI__builtin_neon_vcvt_n_f32_v:
|
|
case ARM::BI__builtin_neon_vcvt_n_f32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
|
|
case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
|
|
- llvm::Type *Tys[2] = { GetNeonType(getLLVMContext(), 4, quad), Ty };
|
|
|
|
- Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
|
|
|
|
|
|
+ llvm::Type *FloatTy =
|
|
|
|
+ GetNeonType(getLLVMContext(),
|
|
|
|
+ NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
|
|
|
|
+ llvm::Type *Tys[2] = { FloatTy, Ty };
|
|
|
|
+ Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
|
|
|
|
+ : Intrinsic::arm_neon_vcvtfxs2fp;
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
return EmitNeonCall(F, Ops, "vcvt_n");
|
|
return EmitNeonCall(F, Ops, "vcvt_n");
|
|
}
|
|
}
|
|
@@ -1464,8 +1477,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
|
case ARM::BI__builtin_neon_vcvt_n_u32_v:
|
|
case ARM::BI__builtin_neon_vcvt_n_u32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_n_s32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_n_s32_v:
|
|
case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
|
|
case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
|
|
- llvm::Type *Tys[2] = { Ty, GetNeonType(getLLVMContext(), 4, quad) };
|
|
|
|
- Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
|
|
|
|
|
|
+ llvm::Type *FloatTy =
|
|
|
|
+ GetNeonType(getLLVMContext(),
|
|
|
|
+ NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
|
|
|
|
+ llvm::Type *Tys[2] = { Ty, FloatTy };
|
|
|
|
+ Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
|
|
|
|
+ : Intrinsic::arm_neon_vcvtfp2fxs;
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
Function *F = CGM.getIntrinsic(Int, Tys);
|
|
return EmitNeonCall(F, Ops, "vcvt_n");
|
|
return EmitNeonCall(F, Ops, "vcvt_n");
|
|
}
|
|
}
|
|
@@ -1656,12 +1673,12 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
|
}
|
|
}
|
|
case ARM::BI__builtin_neon_vmul_v:
|
|
case ARM::BI__builtin_neon_vmul_v:
|
|
case ARM::BI__builtin_neon_vmulq_v:
|
|
case ARM::BI__builtin_neon_vmulq_v:
|
|
- assert(poly && "vmul builtin only supported for polynomial types");
|
|
|
|
|
|
+ assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
|
|
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
|
|
Ops, "vmul");
|
|
Ops, "vmul");
|
|
case ARM::BI__builtin_neon_vmull_v:
|
|
case ARM::BI__builtin_neon_vmull_v:
|
|
Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
|
|
Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
|
|
- Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
|
|
|
|
|
|
+ Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
|
|
case ARM::BI__builtin_neon_vpadal_v:
|
|
case ARM::BI__builtin_neon_vpadal_v:
|
|
case ARM::BI__builtin_neon_vpadalq_v: {
|
|
case ARM::BI__builtin_neon_vpadalq_v: {
|