Переглянути джерело

[x86] fix cost of SINT_TO_FP for i32 --> float (PR21356, PR28434)

This is "cvtdq2ps" which does not appear to be particularly slow on any CPU
according to Agner's tables. Choosing "5" as a cost here as suggested in:
https://llvm.org/bugs/show_bug.cgi?id=21356
...but it seems very conservative given that the instruction is fully pipelined,
and I think these costs are supposed to model throughput.

Note that related costs are also most likely too high, but this fixes PR21356
and partly fixes PR28434.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274658 91177308-0d34-0410-b5e6-96231b3b80d8
Sanjay Patel 9 роки тому
батько
коміт
368e7e3ad1

+ 1 - 1
lib/Target/X86/X86TargetTransformInfo.cpp

@@ -752,7 +752,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
-    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },

+ 6 - 6
test/Analysis/CostModel/X86/sitofp.ll

@@ -341,7 +341,7 @@ define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
 
 
 define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
 define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
   ; SSE2-LABEL: sitofpv4i8v4float
   ; SSE2-LABEL: sitofpv4i8v4float
-  ; SSE2: cost of 15 {{.*}} sitofp
+  ; SSE2: cost of 5 {{.*}} sitofp
   ;
   ;
   ; AVX1-LABEL: sitofpv4i8v4float
   ; AVX1-LABEL: sitofpv4i8v4float
   ; AVX1: cost of 3 {{.*}} sitofp
   ; AVX1: cost of 3 {{.*}} sitofp
@@ -421,7 +421,7 @@ define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
 
 
 define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
 define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
   ; SSE2-LABEL: sitofpv4i16v4float
   ; SSE2-LABEL: sitofpv4i16v4float
-  ; SSE2: cost of 15 {{.*}} sitofp
+  ; SSE2: cost of 5 {{.*}} sitofp
   ;
   ;
   ; AVX1-LABEL: sitofpv4i16v4float
   ; AVX1-LABEL: sitofpv4i16v4float
   ; AVX1: cost of 3 {{.*}} sitofp
   ; AVX1: cost of 3 {{.*}} sitofp
@@ -501,7 +501,7 @@ define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
 
 
 define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
 define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
   ; SSE2-LABEL: sitofpv4i32v4float
   ; SSE2-LABEL: sitofpv4i32v4float
-  ; SSE2: cost of 15 {{.*}} sitofp
+  ; SSE2: cost of 5 {{.*}} sitofp
   ;
   ;
   ; AVX1-LABEL: sitofpv4i32v4float
   ; AVX1-LABEL: sitofpv4i32v4float
   ; AVX1: cost of 1 {{.*}} sitofp
   ; AVX1: cost of 1 {{.*}} sitofp
@@ -517,7 +517,7 @@ define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
 
 
 define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
 define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
   ; SSE2-LABEL: sitofpv8i32v8float
   ; SSE2-LABEL: sitofpv8i32v8float
-  ; SSE2: cost of 30 {{.*}} sitofp
+  ; SSE2: cost of 10 {{.*}} sitofp
   ;
   ;
   ; AVX1-LABEL: sitofpv8i32v8float
   ; AVX1-LABEL: sitofpv8i32v8float
   ; AVX1: cost of 1 {{.*}} sitofp
   ; AVX1: cost of 1 {{.*}} sitofp
@@ -533,7 +533,7 @@ define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
 
 
 define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
 define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
   ; SSE2-LABEL: sitofpv16i32v16float
   ; SSE2-LABEL: sitofpv16i32v16float
-  ; SSE2: cost of 60 {{.*}} sitofp
+  ; SSE2: cost of 20 {{.*}} sitofp
   ;
   ;
   ; AVX1-LABEL: sitofpv16i32v16float
   ; AVX1-LABEL: sitofpv16i32v16float
   ; AVX1: cost of 3 {{.*}} sitofp
   ; AVX1: cost of 3 {{.*}} sitofp
@@ -549,7 +549,7 @@ define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
 
 
 define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
 define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
   ; SSE2-LABEL: sitofpv32i32v32float
   ; SSE2-LABEL: sitofpv32i32v32float
-  ; SSE2: cost of 120 {{.*}} sitofp
+  ; SSE2: cost of 40 {{.*}} sitofp
   ;
   ;
   ; AVX1-LABEL: sitofpv32i32v32float
   ; AVX1-LABEL: sitofpv32i32v32float
   ; AVX1: cost of 7 {{.*}} sitofp
   ; AVX1: cost of 7 {{.*}} sitofp