[X86] Limit X86InterleavedAccessGroup to handle the same type case only
The current implementation assumes the destination type of shuffle is the same as the decomposed ones. Add the check to avoid crush when the condition is not satisfied. This fixes PR37616. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D102751
This commit is contained in:
parent
81b2fcf26f
commit
ca23a38e37
|
@ -733,6 +733,9 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
|
|||
// results by generating some sort of (optimized) target-specific
|
||||
// instructions.
|
||||
|
||||
if (ShuffleTy->getNumElements() != NumSubVecElems)
|
||||
return false;
|
||||
|
||||
switch (NumSubVecElems) {
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -1930,3 +1930,22 @@ define void @splat4_v4i64_load_store(<4 x i64>* %s, <16 x i64>* %d) {
|
|||
store <16 x i64> %r, <16 x i64>* %d, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x i64> @PR37616(<16 x i64>* %a0) {
|
||||
; AVX1-LABEL: PR37616:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovaps 16(%rdi), %xmm0
|
||||
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512-LABEL: PR37616:
|
||||
; AVX2OR512: # %bb.0:
|
||||
; AVX2OR512-NEXT: vmovaps (%rdi), %ymm0
|
||||
; AVX2OR512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
|
||||
; AVX2OR512-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX2OR512-NEXT: vzeroupper
|
||||
; AVX2OR512-NEXT: retq
|
||||
%load = load <16 x i64>, <16 x i64>* %a0, align 128
|
||||
%shuffle = shufflevector <16 x i64> %load, <16 x i64> undef, <2 x i32> <i32 2, i32 6>
|
||||
ret <2 x i64> %shuffle
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue