před 7 roky · d0ebabd8a8
--- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -101,12 +101,12 @@ define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_vbroadcastf128_pd_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
														
 
															   ret <4 x double> %res
														
 
															 }
														
@@ -118,12 +118,12 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_vbroadcastf128_ps_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
														
 
															   ret <8 x float> %res
														
 
															 }
														
@@ -402,14 +402,14 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
 
															 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
														
 
															 ; X86-NEXT:    vmovdqu %xmm0, (%eax)
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_sse2_storeu_dq:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
														
 
															 ; X64-NEXT:    vmovdqu %xmm0, (%rdi)
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
														
 
															   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
														
 
															   ret void
														
@@ -426,7 +426,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
 
															 ; X86-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
														
 
															 ; X86-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
														
 
															 ; X86-NEXT:    vmovupd %xmm0, (%eax)
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_sse2_storeu_pd:
														
 
															 ; X64:       # %bb.0:
														
@@ -434,7 +434,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
 
															 ; X64-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
														
 
															 ; X64-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
														
 
															 ; X64-NEXT:    vmovupd %xmm0, (%rdi)
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
														
 
															   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
														
 
															   ret void
														
@@ -447,12 +447,12 @@ define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vmovups %xmm0, (%eax)
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_sse_storeu_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmovups %xmm0, (%rdi)
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
														
 
															   ret void
														
 
															 }
														
@@ -472,7 +472,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 
															 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
														
 
															 ; X86-NEXT:    vmovups %ymm0, (%eax)
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_storeu_dq_256:
														
 
															 ; X64:       # %bb.0:
														
@@ -483,7 +483,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 
															 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
														
 
															 ; X64-NEXT:    vmovups %ymm0, (%rdi)
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
														
 
															   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
														
 
															   ret void
														
@@ -500,7 +500,7 @@ define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
 
															 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
														
 
															 ; X86-NEXT:    vmovupd %ymm0, (%eax)
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_storeu_pd_256:
														
 
															 ; X64:       # %bb.0:
														
@@ -508,7 +508,7 @@ define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
 
															 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
														
 
															 ; X64-NEXT:    vmovupd %ymm0, (%rdi)
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
														
 
															   call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
														
 
															   ret void
														
@@ -522,13 +522,13 @@ define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vmovups %ymm0, (%eax)
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_storeu_ps_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmovups %ymm0, (%rdi)
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
														
 
															   ret void
														
 
															 }
														
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -287,12 +287,12 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_ldu_dq_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
														
 
															   ret <32 x i8> %res
														
 
															 }
														
@@ -304,12 +304,12 @@ define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskload_pd:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
														
 
															   ret <2 x double> %res
														
 
															 }
														
@@ -321,12 +321,12 @@ define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskload_pd_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
														
 
															   ret <4 x double> %res
														
 
															 }
														
@@ -338,12 +338,12 @@ define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskload_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
														
 
															   ret <4 x float> %res
														
 
															 }
														
@@ -355,12 +355,12 @@ define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskload_ps_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
														
 
															   ret <8 x float> %res
														
 
															 }
														
@@ -372,12 +372,12 @@ define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskstore_pd:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
														
 
															   ret void
														
 
															 }
														
@@ -390,13 +390,13 @@ define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
														
 
															 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskstore_pd_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]
														
 
															 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
														
 
															   ret void
														
 
															 }
														
@@ -408,12 +408,12 @@ define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskstore_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
														
 
															   ret void
														
 
															 }
														
@@ -426,13 +426,13 @@ define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float>
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
														
 
															 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-LABEL: test_x86_avx_maskstore_ps_256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]
														
 
															 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-NEXT:    retq # encoding: [0xc3]
														
 
															   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
														
 
															   ret void
														
 
															 }
														
@@ -720,23 +720,23 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>*
 
															 ; X86-AVX:       # %bb.0:
														
 
															 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-AVX-NEXT:    vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
														
 
															-; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
														
 
															 ; X86-AVX512VL:       # %bb.0:
														
 
															 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-AVX512VL-NEXT:    vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
														
 
															-; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
														
 
															 ; X64-AVX:       # %bb.0:
														
 
															 ; X64-AVX-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07]
														
 
															-; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX-NEXT:    retq # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
														
 
															 ; X64-AVX512VL:       # %bb.0:
														
 
															 ; X64-AVX512VL-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07]
														
 
															-; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
														
 
															   %a2 = load <4 x i32>, <4 x i32>* %a1
														
 
															   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
														
 
															   ret <4 x float> %res
														
@@ -951,7 +951,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 
															 ; X86-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
														
 
															 ; X86-AVX-NEXT:    vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
														
 
															 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X86-AVX512VL-LABEL: movnt_dq:
														
 
															 ; X86-AVX512VL:       # %bb.0:
														
@@ -960,7 +960,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 
															 ; X86-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
														
 
															 ; X86-AVX512VL-NEXT:    vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
														
 
															 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX-LABEL: movnt_dq:
														
 
															 ; X64-AVX:       # %bb.0:
														
@@ -968,7 +968,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 
															 ; X64-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
														
 
															 ; X64-AVX-NEXT:    vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
														
 
															 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX-NEXT:    retq # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX512VL-LABEL: movnt_dq:
														
 
															 ; X64-AVX512VL:       # %bb.0:
														
@@ -976,7 +976,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 
															 ; X64-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
														
 
															 ; X64-AVX512VL-NEXT:    vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
														
 
															 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
														
 
															   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
														
 
															   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
														
 
															   tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
														
@@ -990,26 +990,26 @@ define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
 
															 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-AVX-NEXT:    vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
														
 
															 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X86-AVX512VL-LABEL: movnt_ps:
														
 
															 ; X86-AVX512VL:       # %bb.0:
														
 
															 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
														
 
															 ; X86-AVX512VL-NEXT:    vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
														
 
															 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX-LABEL: movnt_ps:
														
 
															 ; X64-AVX:       # %bb.0:
														
 
															 ; X64-AVX-NEXT:    vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]
														
 
															 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX-NEXT:    retq # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX512VL-LABEL: movnt_ps:
														
 
															 ; X64-AVX512VL:       # %bb.0:
														
 
															 ; X64-AVX512VL-NEXT:    vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
														
 
															 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
														
 
															   tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
														
 
															   ret void
														
 
															 }
														
@@ -1024,7 +1024,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
 
															 ; X86-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
														
 
															 ; X86-AVX-NEXT:    vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
														
 
															 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X86-AVX512VL-LABEL: movnt_pd:
														
 
															 ; X86-AVX512VL:       # %bb.0:
														
@@ -1033,7 +1033,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
 
															 ; X86-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
														
 
															 ; X86-AVX512VL-NEXT:    vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
														
 
															 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X86-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX-LABEL: movnt_pd:
														
 
															 ; X64-AVX:       # %bb.0:
														
@@ -1041,7 +1041,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
 
															 ; X64-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
														
 
															 ; X64-AVX-NEXT:    vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]
														
 
															 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX-NEXT:    retq # encoding: [0xc3]
														
 
															 ;
														
 
															 ; X64-AVX512VL-LABEL: movnt_pd:
														
 
															 ; X64-AVX512VL:       # %bb.0:
														
@@ -1049,7 +1049,7 @@ define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
 
															 ; X64-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
														
 
															 ; X64-AVX512VL-NEXT:    vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
														
 
															 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
														
 
															-; X64-AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
														
 
															+; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
														
 
															   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
														
 
															   tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
														
 
															   ret void
														
--- a/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
@@ -367,12 +367,12 @@ define <4 x i64> @test_mm256_broadcastsi128_si256_mem(<2 x i64>* %p0) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_broadcastsi128_si256_mem:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %a0 = load <2 x i64>, <2 x i64>* %p0
														
 
															   %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
														
 
															   ret <4 x i64> %res
														
@@ -766,7 +766,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i32gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
@@ -774,7 +774,7 @@ define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i32 *%a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
														
@@ -789,12 +789,12 @@ define <2 x i64> @test_mm_mask_i32gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i32gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
														
 
															   %arg1 = bitcast i32 *%a1 to i8*
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
@@ -812,7 +812,7 @@ define <4 x i64> @test_mm256_i32gather_epi32(i32 *%a0, <4 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1
														
 
															 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i32gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
@@ -820,7 +820,7 @@ define <4 x i64> @test_mm256_i32gather_epi32(i32 *%a0, <4 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1
														
 
															 ; X64-NEXT:    vmovdqa %ymm1, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i32 *%a0 to i8*
														
 
															   %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
														
 
															   %mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i32>
														
@@ -835,12 +835,12 @@ define <4 x i64> @test_mm256_mask_i32gather_epi32(<4 x i64> %a0, i32 *%a1, <4 x
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i32gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
														
 
															   %arg1 = bitcast i32 *%a1 to i8*
														
 
															   %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
														
@@ -858,7 +858,7 @@ define <2 x i64> @test_mm_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i32gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
@@ -866,7 +866,7 @@ define <2 x i64> @test_mm_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64 *%a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> <i64 -1, i64 -1>, i8 2)
														
@@ -879,12 +879,12 @@ define <2 x i64> @test_mm_mask_i32gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i32gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast i64 *%a1 to i8*
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
 
															   %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2)
														
@@ -899,7 +899,7 @@ define <4 x i64> @test_mm256_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1
														
 
															 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i32gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
@@ -907,7 +907,7 @@ define <4 x i64> @test_mm256_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1
														
 
															 ; X64-NEXT:    vmovdqa %ymm1, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64 *%a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)
														
@@ -920,12 +920,12 @@ define <4 x i64> @test_mm256_mask_i32gather_epi64(<4 x i64> %a0, i64 *%a1, <2 x
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i32gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast i64 *%a1 to i8*
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
 
															   %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2)
														
@@ -940,7 +940,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovapd %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i32gather_pd:
														
 
															 ; X64:       # %bb.0:
														
@@ -948,7 +948,7 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovapd %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast double *%a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
														
@@ -964,12 +964,12 @@ define <2 x double> @test_mm_mask_i32gather_pd(<2 x double> %a0, double *%a1, <2
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i32gather_pd:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast double *%a1 to i8*
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
 
															   %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2)
														
@@ -984,7 +984,7 @@ define <4 x double> @test_mm256_i32gather_pd(double *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
														
 
															 ; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1
														
 
															 ; X86-NEXT:    vmovapd %ymm1, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i32gather_pd:
														
 
															 ; X64:       # %bb.0:
														
@@ -992,7 +992,7 @@ define <4 x double> @test_mm256_i32gather_pd(double *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
														
 
															 ; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1
														
 
															 ; X64-NEXT:    vmovapd %ymm1, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast double *%a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
														
@@ -1006,12 +1006,12 @@ define <4 x double> @test_mm256_mask_i32gather_pd(<4 x double> %a0, double *%a1,
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i32gather_pd:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast double *%a1 to i8*
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
 
															   %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2)
														
@@ -1026,7 +1026,7 @@ define <4 x float> @test_mm_i32gather_ps(float *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vxorps %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovaps %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i32gather_ps:
														
 
															 ; X64:       # %bb.0:
														
@@ -1034,7 +1034,7 @@ define <4 x float> @test_mm_i32gather_ps(float *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovaps %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast float *%a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
														
@@ -1050,12 +1050,12 @@ define <4 x float> @test_mm_mask_i32gather_ps(<4 x float> %a0, float *%a1, <2 x
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i32gather_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast float *%a1 to i8*
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
 
															   %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2)
														
@@ -1070,7 +1070,7 @@ define <8 x float> @test_mm256_i32gather_ps(float *%a0, <4 x i64> %a1) {
 
															 ; X86-NEXT:    vcmpeqps %ymm1, %ymm1, %ymm2
														
 
															 ; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1
														
 
															 ; X86-NEXT:    vmovaps %ymm1, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i32gather_ps:
														
 
															 ; X64:       # %bb.0:
														
@@ -1078,7 +1078,7 @@ define <8 x float> @test_mm256_i32gather_ps(float *%a0, <4 x i64> %a1) {
 
															 ; X64-NEXT:    vcmpeqps %ymm1, %ymm1, %ymm2
														
 
															 ; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1
														
 
															 ; X64-NEXT:    vmovaps %ymm1, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast float *%a0 to i8*
														
 
															   %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
														
 
															   %mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0)
														
@@ -1092,12 +1092,12 @@ define <8 x float> @test_mm256_mask_i32gather_ps(<8 x float> %a0, float *%a1, <4
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i32gather_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast float *%a1 to i8*
														
 
															   %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
														
 
															   %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2)
														
@@ -1112,7 +1112,7 @@ define <2 x i64> @test_mm_i64gather_epi32(i32 *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i64gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
@@ -1120,7 +1120,7 @@ define <2 x i64> @test_mm_i64gather_epi32(i32 *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i32 *%a0 to i8*
														
 
															   %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
														
 
															   %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2)
														
@@ -1134,12 +1134,12 @@ define <2 x i64> @test_mm_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i64gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
														
 
															   %arg1 = bitcast i32 *%a1 to i8*
														
 
															   %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
														
@@ -1157,7 +1157,7 @@ define <2 x i64> @test_mm256_i64gather_epi32(i32 *%a0, <4 x i64> %a1) {
 
															 ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i64gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
@@ -1166,7 +1166,7 @@ define <2 x i64> @test_mm256_i64gather_epi32(i32 *%a0, <4 x i64> %a1) {
 
															 ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i32 *%a0 to i8*
														
 
															   %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
														
 
															   %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2)
														
@@ -1181,13 +1181,13 @@ define <2 x i64> @test_mm256_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <4 x
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i64gather_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
														
 
															   %arg1 = bitcast i32 *%a1 to i8*
														
 
															   %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
														
@@ -1204,7 +1204,7 @@ define <2 x i64> @test_mm_i64gather_epi64(i64 *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i64gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
@@ -1212,7 +1212,7 @@ define <2 x i64> @test_mm_i64gather_epi64(i64 *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64 *%a0 to i8*
														
 
															   %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> <i64 -1, i64 -1>, i8 2)
														
 
															   ret <2 x i64> %call
														
@@ -1224,12 +1224,12 @@ define <2 x i64> @test_mm_mask_i64gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i64gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast i64 *%a1 to i8*
														
 
															   %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
														
 
															   ret <2 x i64> %call
														
@@ -1243,7 +1243,7 @@ define <4 x i64> @test_mm256_i64gather_epi64(i64 *%a0, <4 x i64> %a1) {
 
															 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1
														
 
															 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i64gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
@@ -1251,7 +1251,7 @@ define <4 x i64> @test_mm256_i64gather_epi64(i64 *%a0, <4 x i64> %a1) {
 
															 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1
														
 
															 ; X64-NEXT:    vmovdqa %ymm1, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64 *%a0 to i8*
														
 
															   %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)
														
 
															   ret <4 x i64> %call
														
@@ -1263,12 +1263,12 @@ define <4 x i64> @test_mm256_mask_i64gather_epi64(<4 x i64> %a0, i64 *%a1, <4 x
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i64gather_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast i64 *%a1 to i8*
														
 
															   %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
														
 
															   ret <4 x i64> %call
														
@@ -1282,7 +1282,7 @@ define <2 x double> @test_mm_i64gather_pd(double *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovapd %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i64gather_pd:
														
 
															 ; X64:       # %bb.0:
														
@@ -1290,7 +1290,7 @@ define <2 x double> @test_mm_i64gather_pd(double *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovapd %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast double *%a0 to i8*
														
 
															   %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
														
 
															   %sext = sext <2 x i1> %cmp to <2 x i64>
														
@@ -1305,12 +1305,12 @@ define <2 x double> @test_mm_mask_i64gather_pd(<2 x double> %a0, double *%a1, <2
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i64gather_pd:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast double *%a1 to i8*
														
 
															   %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2)
														
 
															   ret <2 x double> %call
														
@@ -1324,7 +1324,7 @@ define <4 x double> @test_mm256_i64gather_pd(double *%a0, <4 x i64> %a1) {
 
															 ; X86-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
														
 
															 ; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1
														
 
															 ; X86-NEXT:    vmovapd %ymm1, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i64gather_pd:
														
 
															 ; X64:       # %bb.0:
														
@@ -1332,7 +1332,7 @@ define <4 x double> @test_mm256_i64gather_pd(double *%a0, <4 x i64> %a1) {
 
															 ; X64-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
														
 
															 ; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1
														
 
															 ; X64-NEXT:    vmovapd %ymm1, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast double *%a0 to i8*
														
 
															   %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
														
 
															   %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2)
														
@@ -1345,12 +1345,12 @@ define <4 x double> @test_mm256_mask_i64gather_pd(<4 x double> %a0, i64 *%a1, <4
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i64gather_pd:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast i64 *%a1 to i8*
														
 
															   %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2)
														
 
															   ret <4 x double> %call
														
@@ -1364,7 +1364,7 @@ define <4 x float> @test_mm_i64gather_ps(float *%a0, <2 x i64> %a1) {
 
															 ; X86-NEXT:    vxorps %xmm1, %xmm1, %xmm1
														
 
															 ; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovaps %xmm1, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_i64gather_ps:
														
 
															 ; X64:       # %bb.0:
														
@@ -1372,7 +1372,7 @@ define <4 x float> @test_mm_i64gather_ps(float *%a0, <2 x i64> %a1) {
 
															 ; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
														
 
															 ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovaps %xmm1, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast float *%a0 to i8*
														
 
															   %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
														
 
															   %sext = sext <4 x i1> %cmp to <4 x i32>
														
@@ -1387,12 +1387,12 @@ define <4 x float> @test_mm_mask_i64gather_ps(<4 x float> %a0, float *%a1, <2 x
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_mask_i64gather_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast float *%a1 to i8*
														
 
															   %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2)
														
 
															   ret <4 x float> %call
														
@@ -1407,7 +1407,7 @@ define <4 x float> @test_mm256_i64gather_ps(float *%a0, <4 x i64> %a1) {
 
															 ; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1
														
 
															 ; X86-NEXT:    vmovaps %xmm1, %xmm0
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_i64gather_ps:
														
 
															 ; X64:       # %bb.0:
														
@@ -1416,7 +1416,7 @@ define <4 x float> @test_mm256_i64gather_ps(float *%a0, <4 x i64> %a1) {
 
															 ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1
														
 
															 ; X64-NEXT:    vmovaps %xmm1, %xmm0
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast float *%a0 to i8*
														
 
															   %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
														
 
															   %sext = sext <4 x i1> %cmp to <4 x i32>
														
@@ -1432,13 +1432,13 @@ define <4 x float> @test_mm256_mask_i64gather_ps(<4 x float> %a0, float *%a1, <4
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_mask_i64gather_ps:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg1 = bitcast float *%a1 to i8*
														
 
															   %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2)
														
 
															   ret <4 x float> %call
														
@@ -1496,12 +1496,12 @@ define <2 x i64> @test_mm_maskload_epi32(i32* %a0, <2 x i64> %a1) nounwind {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovd (%eax), %xmm0, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_maskload_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i32* %a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1)
														
@@ -1515,12 +1515,12 @@ define <4 x i64> @test_mm256_maskload_epi32(i32* %a0, <4 x i64> %a1) nounwind {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovd (%eax), %ymm0, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_maskload_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i32* %a0 to i8*
														
 
															   %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
														
 
															   %call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1)
														
@@ -1534,12 +1534,12 @@ define <2 x i64> @test_mm_maskload_epi64(i64* %a0, <2 x i64> %a1) nounwind {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovq (%eax), %xmm0, %xmm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_maskload_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64* %a0 to i8*
														
 
															   %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1)
														
 
															   ret <2 x i64> %res
														
@@ -1551,12 +1551,12 @@ define <4 x i64> @test_mm256_maskload_epi64(i64* %a0, <4 x i64> %a1) nounwind {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovq (%eax), %ymm0, %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_maskload_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64* %a0 to i8*
														
 
															   %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1)
														
 
															   ret <4 x i64> %res
														
@@ -1568,12 +1568,12 @@ define void @test_mm_maskstore_epi32(float* %a0, <2 x i64> %a1, <2 x i64> %a2) n
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovd %xmm1, %xmm0, (%eax)
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_maskstore_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast float* %a0 to i8*
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
														
 
															   %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
														
@@ -1588,13 +1588,13 @@ define void @test_mm256_maskstore_epi32(float* %a0, <4 x i64> %a1, <4 x i64> %a2
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovd %ymm1, %ymm0, (%eax)
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_maskstore_epi32:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast float* %a0 to i8*
														
 
															   %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
														
 
															   %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
														
@@ -1608,12 +1608,12 @@ define void @test_mm_maskstore_epi64(i64* %a0, <2 x i64> %a1, <2 x i64> %a2) nou
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovq %xmm1, %xmm0, (%eax)
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm_maskstore_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi)
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64* %a0 to i8*
														
 
															   call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2)
														
 
															   ret void
														
@@ -1626,13 +1626,13 @@ define void @test_mm256_maskstore_epi64(i64* %a0, <4 x i64> %a1, <4 x i64> %a2)
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vpmaskmovq %ymm1, %ymm0, (%eax)
														
 
															 ; X86-NEXT:    vzeroupper
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_maskstore_epi64:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vpmaskmovq %ymm1, %ymm0, (%rdi)
														
 
															 ; X64-NEXT:    vzeroupper
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast i64* %a0 to i8*
														
 
															   call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2)
														
 
															   ret void
														
@@ -2465,12 +2465,12 @@ define <4 x i64> @test_mm256_stream_load_si256(<4 x i64> *%a0) {
 
															 ; X86:       # %bb.0:
														
 
															 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
														
 
															 ; X86-NEXT:    vmovntdqa (%eax), %ymm0
														
 
															-; X86-NEXT:    ret{{[l|q]}}
														
 
															+; X86-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-LABEL: test_mm256_stream_load_si256:
														
 
															 ; X64:       # %bb.0:
														
 
															 ; X64-NEXT:    vmovntdqa (%rdi), %ymm0
														
 
															-; X64-NEXT:    ret{{[l|q]}}
														
 
															+; X64-NEXT:    retq
														
 
															   %arg0 = bitcast <4 x i64> *%a0 to i8*
														
 
															   %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0)
														
 
															   ret <4 x i64> %res
														
--- a/test/CodeGen/X86/packss.ll
+++ b/test/CodeGen/X86/packss.ll
@@ -74,28 +74,28 @@ define <8 x i16> @trunc_ashr_v4i32_icmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwi
 
															 ; X86-SSE-NEXT:    psrad $31, %xmm0
														
 
															 ; X86-SSE-NEXT:    pcmpgtd {{\.LCPI.*}}, %xmm1
														
 
															 ; X86-SSE-NEXT:    packssdw %xmm1, %xmm0
														
 
															-; X86-SSE-NEXT:    ret{{[l|q]}}
														
 
															+; X86-SSE-NEXT:    retl
														
 
															 ;
														
 
															 ; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
														
 
															 ; X86-AVX:       # %bb.0:
														
 
															 ; X86-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
														
 
															 ; X86-AVX-NEXT:    vpcmpgtd {{\.LCPI.*}}, %xmm1, %xmm1
														
 
															 ; X86-AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
														
 
															-; X86-AVX-NEXT:    ret{{[l|q]}}
														
 
															+; X86-AVX-NEXT:    retl
														
 
															 ;
														
 
															 ; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32:
														
 
															 ; X64-SSE:       # %bb.0:
														
 
															 ; X64-SSE-NEXT:    psrad $31, %xmm0
														
 
															 ; X64-SSE-NEXT:    pcmpgtd {{.*}}(%rip), %xmm1
														
 
															 ; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
														
 
															-; X64-SSE-NEXT:    ret{{[l|q]}}
														
 
															+; X64-SSE-NEXT:    retq
														
 
															 ;
														
 
															 ; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
														
 
															 ; X64-AVX:       # %bb.0:
														
 
															 ; X64-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
														
 
															 ; X64-AVX-NEXT:    vpcmpgtd {{.*}}(%rip), %xmm1, %xmm1
														
 
															 ; X64-AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
														
 
															-; X64-AVX-NEXT:    ret{{[l|q]}}
														
 
															+; X64-AVX-NEXT:    retq
														
 
															   %1 = ashr <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>
														
 
															   %2 = icmp sgt <4 x i32> %b, <i32 1, i32 16, i32 255, i32 65535>
														
 
															   %3 = sext <4 x i1> %2 to <4 x i32>
														
--- a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
@@ -195,34 +195,22 @@ define i32 @test_mm_cmpestrz(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nou
 
															 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
														
 
															 define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpgt_epi64:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    pcmpgtq %xmm1, %xmm0
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpgt_epi64:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    pcmpgtq %xmm1, %xmm0
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpgt_epi64:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    pcmpgtq %xmm1, %xmm0
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %cmp = icmp sgt <2 x i64> %a0, %a1
														
 
															   %res = sext <2 x i1> %cmp to <2 x i64>
														
 
															   ret <2 x i64> %res
														
 
															 }
														
 
															 define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistra:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    xorl %eax, %eax
														
 
															-; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    seta %al
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistra:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    xorl %eax, %eax
														
 
															-; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    seta %al
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistra:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    xorl %eax, %eax
														
 
															+; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    seta %al
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
@@ -231,19 +219,12 @@ define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) {
 
															 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
														
 
															 define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistrc:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    xorl %eax, %eax
														
 
															-; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    setb %al
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistrc:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    xorl %eax, %eax
														
 
															-; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    setb %al
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistrc:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    xorl %eax, %eax
														
 
															+; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    setb %al
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
@@ -252,17 +233,11 @@ define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {
 
															 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
														
 
															 define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistri:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    movl %ecx, %eax
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistri:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    movl %ecx, %eax
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistri:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    movl %ecx, %eax
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
@@ -271,15 +246,10 @@ define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) {
 
															 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
														
 
															 define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistrm:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    pcmpistrm $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistrm:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    pcmpistrm $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistrm:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    pcmpistrm $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
@@ -289,19 +259,12 @@ define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) {
 
															 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
														
 
															 define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistro:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    xorl %eax, %eax
														
 
															-; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    seto %al
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistro:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    xorl %eax, %eax
														
 
															-; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    seto %al
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistro:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    xorl %eax, %eax
														
 
															+; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    seto %al
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
@@ -310,19 +273,12 @@ define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) {
 
															 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
														
 
															 define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistrs:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    xorl %eax, %eax
														
 
															-; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    sets %al
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistrs:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    xorl %eax, %eax
														
 
															-; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    sets %al
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistrs:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    xorl %eax, %eax
														
 
															+; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    sets %al
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
@@ -331,19 +287,12 @@ define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) {
 
															 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
														
 
															 define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) {
														
 
															-; X32-LABEL: test_mm_cmpistrz:
														
 
															-; X32:       # %bb.0:
														
 
															-; X32-NEXT:    xorl %eax, %eax
														
 
															-; X32-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X32-NEXT:    sete %al
														
 
															-; X32-NEXT:    retl
														
 
															-;
														
 
															-; X64-LABEL: test_mm_cmpistrz:
														
 
															-; X64:       # %bb.0:
														
 
															-; X64-NEXT:    xorl %eax, %eax
														
 
															-; X64-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															-; X64-NEXT:    sete %al
														
 
															-; X64-NEXT:    retq
														
 
															+; ALL-LABEL: test_mm_cmpistrz:
														
 
															+; ALL:       # %bb.0:
														
 
															+; ALL-NEXT:    xorl %eax, %eax
														
 
															+; ALL-NEXT:    pcmpistri $7, %xmm1, %xmm0
														
 
															+; ALL-NEXT:    sete %al
														
 
															+; ALL-NEXT:    ret{{[l|q]}}
														
 
															   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
														
 
															   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
														
 
															   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
														
--- a/utils/UpdateTestChecks/asm.py
+++ b/utils/UpdateTestChecks/asm.py
@@ -107,7 +107,7 @@ def scrub_asm_x86(asm, args):
 
															   asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
														
 
															   # Generically match a LCP symbol.
														
 
															   asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
														
 
															-  if getattr(args, 'x86_extra_scrub', False):
														
 
															+  if getattr(args, 'extra_scrub', False):
														
 
															     # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
														
 
															     asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
														
 
															   # Strip kill operands inserted into the asm.
														
--- a/utils/UpdateTestChecks/common.py
+++ b/utils/UpdateTestChecks/common.py
@@ -3,6 +3,7 @@ import re
 
															 import string
														
 
															 import subprocess
														
 
															 import sys
														
 
															+import copy
														
 
															 if sys.version_info[0] > 2:
														
 
															   class string:
														
@@ -80,13 +81,29 @@ def scrub_body(body):
 
															   body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
														
 
															   return body
														
 
															+def do_scrub(body, scrubber, scrubber_args, extra):
														
 
															+  if scrubber_args:
														
 
															+    local_args = copy.deepcopy(scrubber_args)
														
 
															+    local_args[0].extra_scrub = extra
														
 
															+    return scrubber(body, *local_args)
														
 
															+  return scrubber(body, *scrubber_args)
														
 
															+
														
 
															 # Build up a dictionary of all the function bodies.
														
 
															+class function_body(object):
														
 
															+  def __init__(self, string, extra):
														
 
															+    self.scrub = string
														
 
															+    self.extrascrub = extra
														
 
															+  def __str__(self):
														
 
															+    return self.scrub
														
 
															+
														
 
															 def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
														
 
															   for m in function_re.finditer(raw_tool_output):
														
 
															     if not m:
														
 
															       continue
														
 
															     func = m.group('func')
														
 
															-    scrubbed_body = scrubber(m.group('body'), *scrubber_args)
														
 
															+    body = m.group('body')
														
 
															+    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
														
 
															+    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
														
 
															     if m.groupdict().has_key('analysis'):
														
 
															       analysis = m.group('analysis')
														
 
															       if analysis.lower() != 'cost model analysis':
														
@@ -99,15 +116,19 @@ def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_too
 
															       for l in scrubbed_body.splitlines():
														
 
															         print('  ' + l, file=sys.stderr)
														
 
															     for prefix in prefixes:
														
 
															-      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
														
 
															-        if prefix == prefixes[-1]:
														
 
															-          print('WARNING: Found conflicting asm under the '
														
 
															-                               'same prefix: %r!' % (prefix,), file=sys.stderr)
														
 
															-        else:
														
 
															-          func_dict[prefix][func] = None
														
 
															+      if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body:
														
 
															+        if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra:
														
 
															+          func_dict[prefix][func].scrub = scrubbed_extra
														
 
															           continue
														
 
															+        else:
														
 
															+          if prefix == prefixes[-1]:
														
 
															+            print('WARNING: Found conflicting asm under the '
														
 
															+                                 'same prefix: %r!' % (prefix,), file=sys.stderr)
														
 
															+          else:
														
 
															+            func_dict[prefix][func] = None
														
 
															+            continue
														
 
															-      func_dict[prefix][func] = scrubbed_body
														
 
															+      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra)
														
 
															 ##### Generator of LLVM IR CHECK lines
														
@@ -188,7 +209,7 @@ def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
 
															       printed_prefixes.append(checkprefix)
														
 
															       output_lines.append(check_label_format % (checkprefix, func_name))
														
 
															-      func_body = func_dict[checkprefix][func_name].splitlines()
														
 
															+      func_body = str(func_dict[checkprefix][func_name]).splitlines()
														
 
															       # For ASM output, just emit the check lines.
														
 
															       if is_asm == True:
														
--- a/utils/update_llc_test_checks.py
+++ b/utils/update_llc_test_checks.py
@@ -28,8 +28,8 @@ def main():
 
															   parser.add_argument(
														
 
															       '--function', help='The function in the test file to update')
														
 
															   parser.add_argument(
														
 
															-      '--x86_extra_scrub', action='store_true',
														
 
															-      help='Use more regex for x86 matching to reduce diffs between various subtargets')
														
 
															+      '--extra_scrub', action='store_true',
														
 
															+      help='Always use additional regex to further reduce diffs between various subtargets')
														
 
															   parser.add_argument('tests', nargs='+')
														
 
															   args = parser.parse_args()