[ARM] Addition SSAT/USAT tests for min/max patterns. NFC

This commit is contained in:
David Green 2022-02-21 16:24:58 +00:00
parent 52577cd26f
commit 4d5b020d6e
3 changed files with 896 additions and 0 deletions

View file

@ -121,3 +121,139 @@ while.body: ; preds = %while.body.prol.loo
while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry
ret void
}
define void @ssat_unroll_minmax(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i16* nocapture writeonly %pDst, i32 %blockSize) {
; CHECK-LABEL: ssat_unroll_minmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB1_6
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: movw r12, #32768
; CHECK-NEXT: sub lr, r3, #1
; CHECK-NEXT: tst r3, #1
; CHECK-NEXT: movt r12, #65535
; CHECK-NEXT: beq .LBB1_3
; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader
; CHECK-NEXT: ldrsh r3, [r0], #2
; CHECK-NEXT: ldrsh r4, [r1], #2
; CHECK-NEXT: smulbb r3, r4, r3
; CHECK-NEXT: asr r4, r3, #14
; CHECK-NEXT: cmn r4, #32768
; CHECK-NEXT: mov r4, r12
; CHECK-NEXT: asrgt r4, r3, #14
; CHECK-NEXT: movw r3, #32767
; CHECK-NEXT: cmp r4, r3
; CHECK-NEXT: movge r4, r3
; CHECK-NEXT: mov r3, lr
; CHECK-NEXT: strh r4, [r2], #2
; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp lr, #0
; CHECK-NEXT: beq .LBB1_6
; CHECK-NEXT: @ %bb.4: @ %while.body.preheader1
; CHECK-NEXT: movw lr, #32767
; CHECK-NEXT: .LBB1_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh r4, [r0]
; CHECK-NEXT: ldrsh r5, [r1]
; CHECK-NEXT: smulbb r4, r5, r4
; CHECK-NEXT: asr r5, r4, #14
; CHECK-NEXT: cmn r5, #32768
; CHECK-NEXT: mov r5, r12
; CHECK-NEXT: asrgt r5, r4, #14
; CHECK-NEXT: cmp r5, lr
; CHECK-NEXT: movge r5, lr
; CHECK-NEXT: strh r5, [r2]
; CHECK-NEXT: ldrsh r4, [r0, #2]
; CHECK-NEXT: add r0, r0, #4
; CHECK-NEXT: ldrsh r5, [r1, #2]
; CHECK-NEXT: add r1, r1, #4
; CHECK-NEXT: smulbb r4, r5, r4
; CHECK-NEXT: asr r5, r4, #14
; CHECK-NEXT: cmn r5, #32768
; CHECK-NEXT: mov r5, r12
; CHECK-NEXT: asrgt r5, r4, #14
; CHECK-NEXT: cmp r5, lr
; CHECK-NEXT: movge r5, lr
; CHECK-NEXT: subs r3, r3, #2
; CHECK-NEXT: strh r5, [r2, #2]
; CHECK-NEXT: add r2, r2, #4
; CHECK-NEXT: bne .LBB1_5
; CHECK-NEXT: .LBB1_6: @ %while.end
; CHECK-NEXT: pop {r4, r5, r11, pc}
entry:
%cmp.not7 = icmp eq i32 %blockSize, 0
br i1 %cmp.not7, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
%0 = add i32 %blockSize, -1
%xtraiter = and i32 %blockSize, 1
%lcmp.mod.not = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader
while.body.prol.preheader: ; preds = %while.body.preheader
%incdec.ptr.prol = getelementptr inbounds i16, i16* %pSrcA, i64 1
%1 = load i16, i16* %pSrcA, align 2
%conv.prol = sext i16 %1 to i32
%incdec.ptr1.prol = getelementptr inbounds i16, i16* %pSrcB, i64 1
%2 = load i16, i16* %pSrcB, align 2
%conv2.prol = sext i16 %2 to i32
%mul.prol = mul nsw i32 %conv2.prol, %conv.prol
%shr.prol = ashr i32 %mul.prol, 14
%3 = call i32 @llvm.smax.i32(i32 %shr.prol, i32 -32768)
%4 = call i32 @llvm.smin.i32(i32 %3, i32 32767)
%conv3.prol = trunc i32 %4 to i16
%incdec.ptr4.prol = getelementptr inbounds i16, i16* %pDst, i64 1
store i16 %conv3.prol, i16* %pDst, align 2
br label %while.body.prol.loopexit
while.body.prol.loopexit: ; preds = %while.body.prol.preheader, %while.body.preheader
%blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ]
%pSrcA.addr.010.unr = phi i16* [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ]
%pDst.addr.09.unr = phi i16* [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ]
%pSrcB.addr.08.unr = phi i16* [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ]
%5 = icmp eq i32 %0, 0
br i1 %5, label %while.end, label %while.body
while.body: ; preds = %while.body.prol.loopexit, %while.body
%blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ]
%pSrcA.addr.010 = phi i16* [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ]
%pDst.addr.09 = phi i16* [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ]
%pSrcB.addr.08 = phi i16* [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ]
%incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.010, i64 1
%6 = load i16, i16* %pSrcA.addr.010, align 2
%conv = sext i16 %6 to i32
%incdec.ptr1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i64 1
%7 = load i16, i16* %pSrcB.addr.08, align 2
%conv2 = sext i16 %7 to i32
%mul = mul nsw i32 %conv2, %conv
%shr = ashr i32 %mul, 14
%8 = call i32 @llvm.smax.i32(i32 %shr, i32 -32768)
%9 = call i32 @llvm.smin.i32(i32 %8, i32 32767)
%conv3 = trunc i32 %9 to i16
%incdec.ptr4 = getelementptr inbounds i16, i16* %pDst.addr.09, i64 1
store i16 %conv3, i16* %pDst.addr.09, align 2
%incdec.ptr.1 = getelementptr inbounds i16, i16* %pSrcA.addr.010, i64 2
%10 = load i16, i16* %incdec.ptr, align 2
%conv.1 = sext i16 %10 to i32
%incdec.ptr1.1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i64 2
%11 = load i16, i16* %incdec.ptr1, align 2
%conv2.1 = sext i16 %11 to i32
%mul.1 = mul nsw i32 %conv2.1, %conv.1
%shr.1 = ashr i32 %mul.1, 14
%12 = call i32 @llvm.smax.i32(i32 %shr.1, i32 -32768)
%13 = call i32 @llvm.smin.i32(i32 %12, i32 32767)
%conv3.1 = trunc i32 %13 to i16
%incdec.ptr4.1 = getelementptr inbounds i16, i16* %pDst.addr.09, i64 2
store i16 %conv3.1, i16* %incdec.ptr4, align 2
%dec.1 = add i32 %blkCnt.011, -2
%cmp.not.1 = icmp eq i32 %dec.1, 0
br i1 %cmp.not.1, label %while.end, label %while.body
while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry
ret void
}
declare i32 @llvm.smax.i32(i32, i32) #1
declare i32 @llvm.smin.i32(i32, i32) #1

View file

@ -649,3 +649,339 @@ define i32 @formulated_invalid(i32 %a) {
%r = and i32 %s2, 16777215
ret i32 %r
}
define i32 @mm_sat_base_32bit(i32 %x) {
; V4T-LABEL: mm_sat_base_32bit:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI18_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: movle r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI18_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_base_32bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
%1 = call i32 @llvm.smax.i32(i32 %0, i32 -8388608)
ret i32 %1
}
define i16 @mm_sat_base_16bit(i16 %x) {
; V4T-LABEL: mm_sat_base_16bit:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r2, #255
; V4T-NEXT: lsl r0, r0, #16
; V4T-NEXT: orr r2, r2, #1792
; V4T-NEXT: asr r1, r0, #16
; V4T-NEXT: cmp r1, r2
; V4T-NEXT: asrlt r2, r0, #16
; V4T-NEXT: ldr r0, .LCPI19_0
; V4T-NEXT: cmn r2, #2048
; V4T-NEXT: movgt r0, r2
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI19_0:
; V4T-NEXT: .long 4294965248 @ 0xfffff800
;
; V6T2-LABEL: mm_sat_base_16bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxth r0, r0
; V6T2-NEXT: movw r1, #2047
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movlt r1, r0
; V6T2-NEXT: movw r0, #63488
; V6T2-NEXT: movt r0, #65535
; V6T2-NEXT: cmn r1, #2048
; V6T2-NEXT: movgt r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i16 @llvm.smin.i16(i16 %x, i16 2047)
%1 = call i16 @llvm.smax.i16(i16 %0, i16 -2048)
ret i16 %1
}
define i8 @mm_sat_base_8bit(i8 %x) {
; V4T-LABEL: mm_sat_base_8bit:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: lsl r1, r0, #24
; V4T-NEXT: mov r0, #31
; V4T-NEXT: asr r2, r1, #24
; V4T-NEXT: cmp r2, #31
; V4T-NEXT: asrlt r0, r1, #24
; V4T-NEXT: cmn r0, #32
; V4T-NEXT: mvnle r0, #31
; V4T-NEXT: bx lr
;
; V6T2-LABEL: mm_sat_base_8bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxtb r0, r0
; V6T2-NEXT: cmp r0, #31
; V6T2-NEXT: movge r0, #31
; V6T2-NEXT: cmn r0, #32
; V6T2-NEXT: mvnle r0, #31
; V6T2-NEXT: bx lr
entry:
%0 = call i8 @llvm.smin.i8(i8 %x, i8 31)
%1 = call i8 @llvm.smax.i8(i8 %0, i8 -32)
ret i8 %1
}
define i32 @mm_sat_lower_upper_1(i32 %x) {
; V4T-LABEL: mm_sat_lower_upper_1:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI21_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: movle r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI21_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_lower_upper_1:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
%1 = call i32 @llvm.smax.i32(i32 %0, i32 -8388608)
ret i32 %1
}
define i32 @mm_sat_lower_upper_2(i32 %x) {
; V4T-LABEL: mm_sat_lower_upper_2:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI22_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: movle r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI22_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_lower_upper_2:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
%1 = call i32 @llvm.smax.i32(i32 %0, i32 -8388608)
ret i32 %1
}
define i32 @mm_sat_upper_lower_1(i32 %x) {
; V4T-LABEL: mm_sat_upper_lower_1:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: movle r0, r1
; V4T-NEXT: ldr r1, .LCPI23_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI23_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_sat_upper_lower_2(i32 %x) {
; V4T-LABEL: mm_sat_upper_lower_2:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: movle r0, r1
; V4T-NEXT: ldr r1, .LCPI24_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI24_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_sat_upper_lower_3(i32 %x) {
; V4T-LABEL: mm_sat_upper_lower_3:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: movle r0, r1
; V4T-NEXT: ldr r1, .LCPI25_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI25_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_sat_le_ge(i32 %x) {
; V4T-LABEL: mm_sat_le_ge:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: movle r0, r1
; V4T-NEXT: ldr r1, .LCPI26_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI26_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_sat_le_ge:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #0
; V6T2-NEXT: cmn r0, #8388608
; V6T2-NEXT: movt r1, #65408
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_no_sat_incorrect_interval(i32 %x) {
; V4T-LABEL: mm_no_sat_incorrect_interval:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI27_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movle r0, r1
; V4T-NEXT: ldr r1, .LCPI27_1
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI27_0:
; V4T-NEXT: .long 4275878552 @ 0xfedcba98
; V4T-NEXT: .LCPI27_1:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_no_sat_incorrect_interval:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #47768
; V6T2-NEXT: movt r1, #65244
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movle r0, r1
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -19088744)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i16 @llvm.smin.i16(i16, i16)
declare i16 @llvm.smax.i16(i16, i16)
declare i8 @llvm.smin.i8(i8, i8)
declare i8 @llvm.smax.i8(i8, i8)

View file

@ -608,3 +608,427 @@ entry:
%saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
ret i32 %saturateUp
}
define i32 @mm_unsigned_sat_base_32bit(i32 %x) {
; V4T-LABEL: mm_unsigned_sat_base_32bit:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI15_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movlt r1, r0
; V4T-NEXT: bic r0, r1, r1, asr #31
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI15_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_unsigned_sat_base_32bit:
; V6: @ %bb.0: @ %entry
; V6-NEXT: ldr r1, .LCPI15_0
; V6-NEXT: cmp r0, r1
; V6-NEXT: movlt r1, r0
; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI15_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_base_32bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movlt r1, r0
; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
%1 = call i32 @llvm.smax.i32(i32 %0, i32 0)
ret i32 %1
}
define i16 @mm_unsigned_sat_base_16bit(i16 %x) {
; V4T-LABEL: mm_unsigned_sat_base_16bit:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r2, #255
; V4T-NEXT: lsl r0, r0, #16
; V4T-NEXT: orr r2, r2, #1792
; V4T-NEXT: asr r1, r0, #16
; V4T-NEXT: cmp r1, r2
; V4T-NEXT: asrlt r2, r0, #16
; V4T-NEXT: bic r0, r2, r2, asr #31
; V4T-NEXT: bx lr
;
; V6-LABEL: mm_unsigned_sat_base_16bit:
; V6: @ %bb.0: @ %entry
; V6-NEXT: mov r1, #255
; V6-NEXT: sxth r0, r0
; V6-NEXT: orr r1, r1, #1792
; V6-NEXT: cmp r0, r1
; V6-NEXT: movlt r1, r0
; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_unsigned_sat_base_16bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxth r0, r0
; V6T2-NEXT: movw r1, #2047
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movlt r1, r0
; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = call i16 @llvm.smin.i16(i16 %x, i16 2047)
%1 = call i16 @llvm.smax.i16(i16 %0, i16 0)
ret i16 %1
}
define i8 @mm_unsigned_sat_base_8bit(i8 %x) {
; V4T-LABEL: mm_unsigned_sat_base_8bit:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: lsl r0, r0, #24
; V4T-NEXT: mov r2, #31
; V4T-NEXT: asr r1, r0, #24
; V4T-NEXT: cmp r1, #31
; V4T-NEXT: asrlt r2, r0, #24
; V4T-NEXT: bic r0, r2, r2, asr #31
; V4T-NEXT: bx lr
;
; V6-LABEL: mm_unsigned_sat_base_8bit:
; V6: @ %bb.0: @ %entry
; V6-NEXT: sxtb r0, r0
; V6-NEXT: cmp r0, #31
; V6-NEXT: movge r0, #31
; V6-NEXT: bic r0, r0, r0, asr #31
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_unsigned_sat_base_8bit:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: sxtb r0, r0
; V6T2-NEXT: cmp r0, #31
; V6T2-NEXT: movge r0, #31
; V6T2-NEXT: bic r0, r0, r0, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = call i8 @llvm.smin.i8(i8 %x, i8 31)
%1 = call i8 @llvm.smax.i8(i8 %0, i8 0)
ret i8 %1
}
define i32 @mm_unsigned_sat_lower_upper_1(i32 %x) {
; V4T-LABEL: mm_unsigned_sat_lower_upper_1:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI18_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movlt r1, r0
; V4T-NEXT: bic r0, r1, r1, asr #31
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI18_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_unsigned_sat_lower_upper_1:
; V6: @ %bb.0: @ %entry
; V6-NEXT: ldr r1, .LCPI18_0
; V6-NEXT: cmp r0, r1
; V6-NEXT: movlt r1, r0
; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI18_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_lower_upper_1:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movlt r1, r0
; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
%1 = call i32 @llvm.smax.i32(i32 %0, i32 0)
ret i32 %1
}
define i32 @mm_unsigned_sat_lower_upper_2(i32 %x) {
; V4T-LABEL: mm_unsigned_sat_lower_upper_2:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI19_0
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movlt r1, r0
; V4T-NEXT: bic r0, r1, r1, asr #31
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI19_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_unsigned_sat_lower_upper_2:
; V6: @ %bb.0: @ %entry
; V6-NEXT: ldr r1, .LCPI19_0
; V6-NEXT: cmp r0, r1
; V6-NEXT: movlt r1, r0
; V6-NEXT: bic r0, r1, r1, asr #31
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI19_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_lower_upper_2:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movlt r1, r0
; V6T2-NEXT: bic r0, r1, r1, asr #31
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
%1 = call i32 @llvm.smax.i32(i32 %0, i32 0)
ret i32 %1
}
define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
; V4T-LABEL: mm_unsigned_sat_upper_lower_1:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI20_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlo r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI20_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_unsigned_sat_upper_lower_1:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI20_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlo r0, r1
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI20_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_1:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlo r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
%1 = call i32 @llvm.umin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
; V4T-LABEL: mm_unsigned_sat_upper_lower_2:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI21_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlo r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI21_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_unsigned_sat_upper_lower_2:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI21_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlo r0, r1
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI21_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_2:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlo r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
%1 = call i32 @llvm.umin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
; V4T-LABEL: mm_unsigned_sat_upper_lower_3:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI22_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlo r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI22_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_unsigned_sat_upper_lower_3:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI22_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlo r0, r1
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI22_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_unsigned_sat_upper_lower_3:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlo r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
%1 = call i32 @llvm.umin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_no_unsigned_sat_incorrect_constant(i32 %x) {
; V4T-LABEL: mm_no_unsigned_sat_incorrect_constant:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: orr r1, r0, r0, asr #31
; V4T-NEXT: ldr r0, .LCPI23_0
; V4T-NEXT: cmp r1, r0
; V4T-NEXT: movlt r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI23_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_no_unsigned_sat_incorrect_constant:
; V6: @ %bb.0: @ %entry
; V6-NEXT: orr r1, r0, r0, asr #31
; V6-NEXT: ldr r0, .LCPI23_0
; V6-NEXT: cmp r1, r0
; V6-NEXT: movlt r0, r1
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI23_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: orr r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #65535
; V6T2-NEXT: movt r0, #127
; V6T2-NEXT: cmp r1, r0
; V6T2-NEXT: movlt r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -1)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
; V4T-LABEL: mm_no_unsigned_sat_incorrect_constant2:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: bic r1, r0, r0, asr #31
; V4T-NEXT: mov r0, #1
; V4T-NEXT: orr r0, r0, #8388608
; V4T-NEXT: cmp r1, #8388608
; V4T-NEXT: movls r0, r1
; V4T-NEXT: bx lr
;
; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2:
; V6: @ %bb.0: @ %entry
; V6-NEXT: bic r1, r0, r0, asr #31
; V6-NEXT: mov r0, #1
; V6-NEXT: orr r0, r0, #8388608
; V6-NEXT: cmp r1, #8388608
; V6-NEXT: movls r0, r1
; V6-NEXT: bx lr
;
; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: bic r1, r0, r0, asr #31
; V6T2-NEXT: movw r0, #1
; V6T2-NEXT: movt r0, #128
; V6T2-NEXT: cmp r1, #8388608
; V6T2-NEXT: movls r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
%1 = call i32 @llvm.umin.i32(i32 %0, i32 8388609)
ret i32 %1
}
define i32 @mm_no_unsigned_sat_incorrect_interval(i32 %x) {
; V4T-LABEL: mm_no_unsigned_sat_incorrect_interval:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r1, .LCPI25_0
; V4T-NEXT: cmn r0, #4
; V4T-NEXT: mvnle r0, #3
; V4T-NEXT: cmp r0, r1
; V4T-NEXT: movge r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
; V4T-NEXT: @ %bb.1:
; V4T-NEXT: .LCPI25_0:
; V4T-NEXT: .long 8388607 @ 0x7fffff
;
; V6-LABEL: mm_no_unsigned_sat_incorrect_interval:
; V6: @ %bb.0: @ %entry
; V6-NEXT: ldr r1, .LCPI25_0
; V6-NEXT: cmn r0, #4
; V6-NEXT: mvnle r0, #3
; V6-NEXT: cmp r0, r1
; V6-NEXT: movge r0, r1
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
; V6-NEXT: @ %bb.1:
; V6-NEXT: .LCPI25_0:
; V6-NEXT: .long 8388607 @ 0x7fffff
;
; V6T2-LABEL: mm_no_unsigned_sat_incorrect_interval:
; V6T2: @ %bb.0: @ %entry
; V6T2-NEXT: cmn r0, #4
; V6T2-NEXT: movw r1, #65535
; V6T2-NEXT: mvnle r0, #3
; V6T2-NEXT: movt r1, #127
; V6T2-NEXT: cmp r0, r1
; V6T2-NEXT: movge r0, r1
; V6T2-NEXT: bx lr
entry:
%0 = call i32 @llvm.smax.i32(i32 %x, i32 -4)
%1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
ret i32 %1
}
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i16 @llvm.smin.i16(i16, i16)
declare i16 @llvm.smax.i16(i16, i16)
declare i8 @llvm.smin.i8(i8, i8)
declare i8 @llvm.smax.i8(i8, i8)
declare i32 @llvm.umin.i32(i32, i32)