浏览代码

[ARM] Introduce separate features for FP registers

The MVE extension in Arm v8.1-M permits the use of some move, load and
store isntructions which access the FP registers, even if there's no
actual FP support in the processor (in particular, if you have the
integer-only version of MVE).

Therefore, we need separate subtarget features to condition those
instructions on, which are implied by both FP and MVE but are not part
of either.

Patch mostly by Simon Tatham.

Differential Revision: https://reviews.llvm.org/D60694


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362088 91177308-0d34-0410-b5e6-96231b3b80d8
Sjoerd Meijer 6 年之前
父节点
当前提交
1c34d83f52

+ 22 - 3
lib/Target/ARM/ARM.td

@@ -32,9 +32,26 @@ def ModeSoftFloat         : SubtargetFeature<"soft-float","UseSoftFloat",
 //
 //
 
 
 // Floating Point, HW Division and Neon Support
 // Floating Point, HW Division and Neon Support
+
+// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only
+// version).
+def FeatureFPRegs         : SubtargetFeature<"fpregs", "HasFPRegs", "true",
+                                             "Enable FP registers">;
+
+// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16
+// extension) and MVE (even in the integer-only version).
+def FeatureFPRegs16       : SubtargetFeature<"fpregs16", "HasFPRegs16", "true",
+                                             "Enable 16-bit FP registers",
+                                             [FeatureFPRegs]>;
+
+def FeatureFPRegs64       : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
+                                             "Enable 64-bit FP registers",
+                                             [FeatureFPRegs]>;
+
 def FeatureFP64           : SubtargetFeature<"fp64", "HasFP64", "true",
 def FeatureFP64           : SubtargetFeature<"fp64", "HasFP64", "true",
                                              "Floating point unit supports "
                                              "Floating point unit supports "
-                                             "double precision">;
+                                             "double precision",
+                                             [FeatureFPRegs64]>;
 
 
 def FeatureD32            : SubtargetFeature<"d32", "HasD32", "true",
 def FeatureD32            : SubtargetFeature<"d32", "HasD32", "true",
                                              "Extend FP to 32 double registers">;
                                              "Extend FP to 32 double registers">;
@@ -63,7 +80,9 @@ multiclass VFPver<string name, string query, string description,
         !cast<SubtargetFeature>(NAME # "_SP")]>;
         !cast<SubtargetFeature>(NAME # "_SP")]>;
 }
 }
 
 
-defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions">;
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions",
+                         [], [FeatureFPRegs]>;
+
 defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
 defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
                          [FeatureVFP2]>;
                          [FeatureVFP2]>;
 
 
@@ -84,7 +103,7 @@ defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
 def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
 def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                              "Enable full half-precision "
                                              "Enable full half-precision "
                                              "floating point",
                                              "floating point",
-                                             [FeatureFPARMv8_D16_SP]>;
+                                             [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
 
 
 def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
 def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
                                              "Enable full half-precision "
                                              "Enable full half-precision "

+ 1 - 1
lib/Target/ARM/ARMInstrNEON.td

@@ -6179,7 +6179,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
                           IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                                            imm:$lane))]>,
                                            imm:$lane))]>,
-                Requires<[HasVFP2, HasFastVGETLNi32]> {
+                Requires<[HasFPRegs, HasFastVGETLNi32]> {
   let Inst{21} = lane{0};
   let Inst{21} = lane{0};
 }
 }
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td

+ 31 - 14
lib/Target/ARM/ARMInstrVFP.td

@@ -141,11 +141,13 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>,
+            Requires<[HasFPRegs]>;
 
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
-                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
+                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   // pipelines.
   let D = VFPNeonDomain;
   let D = VFPNeonDomain;
@@ -155,17 +157,19 @@ let isUnpredicable = 1 in
 def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
 def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
                  [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
                  [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
+            Requires<[HasFPRegs]>;
 
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
-                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
+                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   // pipelines.
   let D = VFPNeonDomain;
   let D = VFPNeonDomain;
@@ -175,7 +179,7 @@ let isUnpredicable = 1 in
 def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
 def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
                  [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
                  [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 
 //===----------------------------------------------------------------------===//
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //  Load / store multiple Instructions.
@@ -183,6 +187,7 @@ def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
 
 
 multiclass vfp_ldst_mult<string asm, bit L_bit,
 multiclass vfp_ldst_mult<string asm, bit L_bit,
                          InstrItinClass itin, InstrItinClass itin_upd> {
                          InstrItinClass itin, InstrItinClass itin_upd> {
+  let Predicates = [HasFPRegs] in {
   // Double Precision
   // Double Precision
   def DIA :
   def DIA :
     AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
     AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -250,6 +255,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     // VFP pipelines.
     // VFP pipelines.
     let D = VFPNeonDomain;
     let D = VFPNeonDomain;
   }
   }
+  }
 }
 }
 
 
 let hasSideEffects = 0 in {
 let hasSideEffects = 0 in {
@@ -318,6 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
 // However, there is no UAL syntax for them, so we keep them around for
 // However, there is no UAL syntax for them, so we keep them around for
 // (dis)assembly only.
 // (dis)assembly only.
 multiclass vfp_ldstx_mult<string asm, bit L_bit> {
 multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+  let Predicates = [HasFPRegs] in {
   // Unknown precision
   // Unknown precision
   def XIA :
   def XIA :
     AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
     AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -340,6 +347,7 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> {
     let Inst{21}    = 1;            // Writeback
     let Inst{21}    = 1;            // Writeback
     let Inst{20}    = L_bit;
     let Inst{20}    = L_bit;
   }
   }
+  }
 }
 }
 
 
 defm FLDM : vfp_ldstx_mult<"fldm", 1>;
 defm FLDM : vfp_ldstx_mult<"fldm", 1>;
@@ -1031,11 +1039,13 @@ let hasSideEffects = 0 in {
 let isMoveReg = 1 in {
 let isMoveReg = 1 in {
 def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
 def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs DPR:$Dd), (ins DPR:$Dm),
                   (outs DPR:$Dd), (ins DPR:$Dm),
-                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+             Requires<[HasFPRegs64]>;
 
 
 def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
 def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs SPR:$Sd), (ins SPR:$Sm),
                   (outs SPR:$Sd), (ins SPR:$Sm),
-                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+             Requires<[HasFPRegs]>;
 } // isMoveReg
 } // isMoveReg
 
 
 let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
 let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
@@ -1060,6 +1070,7 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
                       (outs GPR:$Rt), (ins SPR:$Sn),
                       (outs GPR:$Rt), (ins SPR:$Sn),
                       IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
                       IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
                       [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
                       [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+             Requires<[HasFPRegs]>,
              Sched<[WriteFPMOV]> {
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<4> Rt;
   bits<4> Rt;
@@ -1083,7 +1094,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
                       (outs SPR:$Sn), (ins GPR:$Rt),
                       (outs SPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
                       IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
                       [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
                       [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
-             Requires<[HasVFP2, UseVMOVSR]>,
+             Requires<[HasFPRegs, UseVMOVSR]>,
              Sched<[WriteFPMOV]> {
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<5> Sn;
   bits<5> Sn;
@@ -1109,6 +1120,7 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                         (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
                         (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
                         IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
                         IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
                  [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
                  [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
                Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<5> Dm;
   bits<5> Dm;
@@ -1137,6 +1149,7 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
                       (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
                  IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
                  IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]>,
                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
                Sched<[WriteFPMOV]> {
   bits<5> src1;
   bits<5> src1;
   bits<4> Rt;
   bits<4> Rt;
@@ -1164,6 +1177,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                       (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
                       (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
                       IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
                       IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
                       [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
                       [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<5> Dm;
   bits<5> Dm;
@@ -1208,6 +1222,7 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]>,
                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<5> dst1;
   bits<5> dst1;
@@ -1234,7 +1249,7 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
                       (outs GPR:$Rt), (ins HPR:$Sn),
                       (outs GPR:$Rt), (ins HPR:$Sn),
                       IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
                       IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
                       [(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
                       [(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
-             Requires<[HasFullFP16]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<4> Rt;
   bits<4> Rt;
@@ -1256,7 +1271,7 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
                       (outs HPR:$Sn), (ins GPR:$Rt),
                       (outs HPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
                       IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
                       [(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
                       [(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
-             Requires<[HasFullFP16]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   // Instruction operands.
   bits<5> Sn;
   bits<5> Sn;
@@ -2286,13 +2301,14 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 
 // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
 // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
 // to APSR.
 // to APSR.
-let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
+    Rt = 0b1111 /* apsr_nzcv */ in
 def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
 def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
                         "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
                         "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
 
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  // Application level FPSCR -> GPR
  // Application level FPSCR -> GPR
- let hasSideEffects = 1, Uses = [FPSCR] in
+ let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
  def VMRS :  MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
  def VMRS :  MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
                         "vmrs", "\t$Rt, fpscr",
                         "vmrs", "\t$Rt, fpscr",
                         [(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
                         [(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
@@ -2341,6 +2357,7 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  let Defs = [FPSCR] in {
  let Defs = [FPSCR] in {
+   let Predicates = [HasFPRegs] in
    // Application level GPR -> FPSCR
    // Application level GPR -> FPSCR
    def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
    def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
                        "vmsr", "\tfpscr, $src",
                        "vmsr", "\tfpscr, $src",
@@ -2474,7 +2491,7 @@ def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
 def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
 def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
 
 
 
 
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : InstAlias<"fmstat${p}", (FMSTAT pred:$p), 0>, Requires<[HasFPRegs]>;
 def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
 def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
                     (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
                     (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
 def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
 def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",

+ 9 - 0
lib/Target/ARM/ARMPredicates.td

@@ -26,6 +26,15 @@ def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
 def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
 def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
                                  AssemblerPredicate<"HasV8MMainlineOps",
                                  AssemblerPredicate<"HasV8MMainlineOps",
                                                     "armv8m.main">;
                                                     "armv8m.main">;
+def HasFPRegs        : Predicate<"Subtarget->hasFPRegs()">,
+                                 AssemblerPredicate<"FeatureFPRegs",
+                                                    "fp registers">;
+def HasFPRegs16      : Predicate<"Subtarget->hasFPRegs16()">,
+                                 AssemblerPredicate<"FeatureFPRegs16",
+                                                    "16-bit fp registers">;
+def HasFPRegs64      : Predicate<"Subtarget->hasFPRegs64()">,
+                                 AssemblerPredicate<"FeatureFPRegs64",
+                                                    "64-bit fp registers">;
 def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
 def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
                                  AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
                                  AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;

+ 6 - 0
lib/Target/ARM/ARMSubtarget.h

@@ -165,6 +165,9 @@ protected:
   bool HasVFPv4 = false;
   bool HasVFPv4 = false;
   bool HasFPARMv8 = false;
   bool HasFPARMv8 = false;
   bool HasNEON = false;
   bool HasNEON = false;
+  bool HasFPRegs = false;
+  bool HasFPRegs16 = false;
+  bool HasFPRegs64 = false;
 
 
   /// Versions of the VFP flags restricted to single precision, or to
   /// Versions of the VFP flags restricted to single precision, or to
   /// 16 d-registers, or both.
   /// 16 d-registers, or both.
@@ -566,6 +569,9 @@ public:
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+  bool hasFPRegs() const { return HasFPRegs; }
+  bool hasFPRegs16() const { return HasFPRegs16; }
+  bool hasFPRegs64() const { return HasFPRegs64; }
 
 
   /// @{
   /// @{
   /// These functions are obsolete, please consider adding subtarget features
   /// These functions are obsolete, please consider adding subtarget features

+ 10 - 10
test/MC/ARM/fullfp16-neg.s

@@ -165,25 +165,25 @@
   vldr.16 s2, [pc, #510]
   vldr.16 s2, [pc, #510]
   vldr.16 s3, [pc, #-510]
   vldr.16 s3, [pc, #-510]
   vldr.16 s4, [r4, #-18]
   vldr.16 s4, [r4, #-18]
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
 
 
 
 
   vstr.16 s1, [pc, #6]
   vstr.16 s1, [pc, #6]
   vstr.16 s2, [pc, #510]
   vstr.16 s2, [pc, #510]
   vstr.16 s3, [pc, #-510]
   vstr.16 s3, [pc, #-510]
   vstr.16 s4, [r4, #-18]
   vstr.16 s4, [r4, #-18]
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
 
 
   vmov.f16 s0, #1.0
   vmov.f16 s0, #1.0
 @ CHECK: instruction requires: full half-float
 @ CHECK: instruction requires: full half-float
 
 
   vmov.f16 s1, r2
   vmov.f16 s1, r2
   vmov.f16 r3, s4
   vmov.f16 r3, s4
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers

+ 133 - 0
test/MC/ARM/mve-fp-registers.s

@@ -0,0 +1,133 @@
+// Some simple operations on S, D and Q registers (loads, stores and moves) are
+// also avaliable in MVE, even in the integer-only version. Some of these
+// instructions (operating on D or Q registers, or FP16 values) are only
+// available for certain targets.
+
+// Note that it's not always obvious which instructions are available, for
+// example several instructions operating on D registers are available for
+// single-precision only FPUs.
+
+// All of these instructions are rejected if no VFP or MVE features are
+// present.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding 2>%t < %s
+// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP32 --check-prefix=NOFP64
+
+// VFP and NEON implementations by default have FP32 and FP64, but not FP16.
+// The VFPv3 FP16 extension just added conversion instructions, which we don't
+// care about here.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+
+// The v8.2A FP16 extension added loads, stores and moves for FP16.
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
+
+// M-profile FPUs (e.g. Cortex-M4/M7/M33) do not have FP16 instructions, and
+// the FP64 instructions are optional. They are also limited to 16 D registers,
+// but we don't test that here.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32
+// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+
+vldmia  r0, {d0}
+# FP32: vldmia  r0, {d0}               @ encoding: [0x90,0xec,0x02,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstmia  r0, {d0}
+# FP32: vstmia  r0, {d0}                @ encoding: [0x80,0xec,0x02,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldmia  r0, {s0}
+# FP32: vldmia  r0, {s0}                @ encoding: [0x90,0xec,0x01,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstmia  r0, {s0}
+# FP32: vstmia  r0, {s0}                @ encoding: [0x80,0xec,0x01,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+fldmdbx r0!, {d0}
+# FP32: fldmdbx r0!, {d0}               @ encoding: [0x30,0xed,0x03,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+fstmiax r0, {d0}
+# FP32: fstmiax r0, {d0}                @ encoding: [0x80,0xec,0x03,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldr.16 s0, [r0]
+# FP16: vldr.16 s0, [r0]                @ encoding: [0x90,0xed,0x00,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vldr s0, [r0]
+# FP32: vldr    s0, [r0]                @ encoding: [0x90,0xed,0x00,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldr d0, [r0]
+# FP32: vldr    d0, [r0]                @ encoding: [0x90,0xed,0x00,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstr.16 s0, [r0]
+# FP16: vstr.16 s0, [r0]                @ encoding: [0x80,0xed,0x00,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vstr s0, [r0]
+# FP32: vstr    s0, [r0]                @ encoding: [0x80,0xed,0x00,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstr d0, [r0]
+# FP32: vstr    d0, [r0]                @ encoding: [0x80,0xed,0x00,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f16 r0, s0
+# FP16: vmov.f16        r0, s0          @ encoding: [0x10,0xee,0x10,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vmov.f16 s0, r0
+# FP16: vmov.f16        s0, r0          @ encoding: [0x00,0xee,0x10,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vmov s0, r0
+# FP32: vmov    s0, r0                  @ encoding: [0x00,0xee,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, s0
+# FP32: vmov    r0, s0                  @ encoding: [0x10,0xee,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, r1, d0
+# FP32: vmov    r0, r1, d0              @ encoding: [0x51,0xec,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov d0, r0, r1
+# FP32: vmov    d0, r0, r1              @ encoding: [0x41,0xec,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, r1, s0, s1
+# FP32: vmov    r0, r1, s0, s1          @ encoding: [0x51,0xec,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov s0, s1, r0, r1
+# FP32: vmov    s0, s1, r0, r1          @ encoding: [0x41,0xec,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f32 s0, s1
+# FP32: vmov.f32        s0, s1          @ encoding: [0xb0,0xee,0x60,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f64 d0, d1
+# FP64: vmov.f64        d0, d1          @ encoding: [0xb0,0xee,0x41,0x0b]
+# NOFP64: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 64-bit fp registers
+
+vmov.32 r0, d1[0]
+# FP32: vmov.32 r0, d1[0]               @ encoding: [0x11,0xee,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires: fp registers
+
+vmrs apsr_nzcv, fpscr
+# FP32: vmrs    APSR_nzcv, fpscr        @ encoding: [0xf1,0xee,0x10,0xfa]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers

+ 2 - 2
test/MC/ARM/single-precision-fp.s

@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-neon 2> %t > %t2
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-fpregs64,-neon 2> %t > %t2
 @ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
 @ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
 @ RUN:     FileCheck %s < %t2
 @ RUN:     FileCheck %s < %t2
 
 
@@ -72,7 +72,7 @@
         @ FIXME: overlapping aliases and a probable TableGen indeterminacy mean
         @ FIXME: overlapping aliases and a probable TableGen indeterminacy mean
         @ that the actual reason can vary by platform.
         @ that the actual reason can vary by platform.
         vmov.f64 d11, d10
         vmov.f64 d11, d10
-@ CHECK-ERRORS: instruction requires: NEON
+@ CHECK-ERRORS: instruction requires: 64-bit fp registers
 @ CHECK-ERRORS-NEXT: vmov.f64 d11, d10
 @ CHECK-ERRORS-NEXT: vmov.f64 d11, d10
 
 
         vcvt.f64.s32 d9, s8
         vcvt.f64.s32 d9, s8

+ 6 - 6
test/MC/ARM/vmrs_vmsr.s

@@ -103,10 +103,10 @@
 // ERROR-V8M: invalid operand for instruction
 // ERROR-V8M: invalid operand for instruction
 // ERROR-V8M: invalid operand for instruction
 // ERROR-V8M: invalid operand for instruction
 
 
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
@@ -172,9 +172,9 @@
 // ERROR-V8M: operand must be a register in range [r0, r14]
 // ERROR-V8M: operand must be a register in range [r0, r14]
 
 
 // ERROR-NOVFP: invalid instruction
 // ERROR-NOVFP: invalid instruction
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: invalid instruction
 // ERROR-NOVFP: invalid instruction
 // ERROR-NOVFP: invalid instruction
 // ERROR-NOVFP: invalid instruction