|
@@ -1290,6 +1290,111 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16
|
|
|
ret <16 x i16> %res2
|
|
|
}
|
|
|
|
|
|
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
|
|
|
+
|
|
|
+define <16 x i8> @test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
|
|
|
+; X86-LABEL: test_int_x86_avx512_mask_pavg_b_128:
|
|
|
+; X86: # %bb.0:
|
|
|
+; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xd9]
|
|
|
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
|
|
+; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
|
|
|
+; X86-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
|
|
|
+; X86-NEXT: retl # encoding: [0xc3]
|
|
|
+;
|
|
|
+; X64-LABEL: test_int_x86_avx512_mask_pavg_b_128:
|
|
|
+; X64: # %bb.0:
|
|
|
+; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xd9]
|
|
|
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
|
|
+; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
|
|
|
+; X64-NEXT: vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
|
|
|
+; X64-NEXT: retq # encoding: [0xc3]
|
|
|
+ %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1)
|
|
|
+ %2 = bitcast i16 %x3 to <16 x i1>
|
|
|
+ %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
|
|
|
+ %4 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1)
|
|
|
+ %res2 = add <16 x i8> %3, %4
|
|
|
+ ret <16 x i8> %res2
|
|
|
+}
|
|
|
+
|
|
|
+declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>)
|
|
|
+
|
|
|
+define <32 x i8> @test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
|
|
|
+; X86-LABEL: test_int_x86_avx512_mask_pavg_b_256:
|
|
|
+; X86: # %bb.0:
|
|
|
+; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xd9]
|
|
|
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
|
|
|
+; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
|
|
|
+; X86-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
|
|
|
+; X86-NEXT: retl # encoding: [0xc3]
|
|
|
+;
|
|
|
+; X64-LABEL: test_int_x86_avx512_mask_pavg_b_256:
|
|
|
+; X64: # %bb.0:
|
|
|
+; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xd9]
|
|
|
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
|
|
+; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
|
|
|
+; X64-NEXT: vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
|
|
|
+; X64-NEXT: retq # encoding: [0xc3]
|
|
|
+ %1 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1)
|
|
|
+ %2 = bitcast i32 %x3 to <32 x i1>
|
|
|
+ %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
|
|
|
+ %4 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1)
|
|
|
+ %res2 = add <32 x i8> %3, %4
|
|
|
+ ret <32 x i8> %res2
|
|
|
+}
|
|
|
+
|
|
|
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>)
|
|
|
+
|
|
|
+define <8 x i16> @test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|
|
|
+; X86-LABEL: test_int_x86_avx512_mask_pavg_w_128:
|
|
|
+; X86: # %bb.0:
|
|
|
+; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xd9]
|
|
|
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
|
|
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
|
|
|
+; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
|
|
|
+; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
|
|
|
+; X86-NEXT: retl # encoding: [0xc3]
|
|
|
+;
|
|
|
+; X64-LABEL: test_int_x86_avx512_mask_pavg_w_128:
|
|
|
+; X64: # %bb.0:
|
|
|
+; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xd9]
|
|
|
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
|
|
+; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
|
|
|
+; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
|
|
|
+; X64-NEXT: retq # encoding: [0xc3]
|
|
|
+ %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1)
|
|
|
+ %2 = bitcast i8 %x3 to <8 x i1>
|
|
|
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
|
|
|
+ %4 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1)
|
|
|
+ %res2 = add <8 x i16> %3, %4
|
|
|
+ ret <8 x i16> %res2
|
|
|
+}
|
|
|
+
|
|
|
+declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>)
|
|
|
+
|
|
|
+define <16 x i16> @test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
|
|
|
+; X86-LABEL: test_int_x86_avx512_mask_pavg_w_256:
|
|
|
+; X86: # %bb.0:
|
|
|
+; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xd9]
|
|
|
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
|
|
+; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
|
|
|
+; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
|
|
|
+; X86-NEXT: retl # encoding: [0xc3]
|
|
|
+;
|
|
|
+; X64-LABEL: test_int_x86_avx512_mask_pavg_w_256:
|
|
|
+; X64: # %bb.0:
|
|
|
+; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xd9]
|
|
|
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
|
|
|
+; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
|
|
|
+; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
|
|
|
+; X64-NEXT: retq # encoding: [0xc3]
|
|
|
+ %1 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1)
|
|
|
+ %2 = bitcast i16 %x3 to <16 x i1>
|
|
|
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
|
|
|
+ %4 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1)
|
|
|
+ %res2 = add <16 x i16> %3, %4
|
|
|
+ ret <16 x i16> %res2
|
|
|
+}
|
|
|
+
|
|
|
declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>)
|
|
|
|
|
|
define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
|