[ExpandMemCmp] Properly expand bcmp
to an equality pattern.
Before that change, constant-size `bcmp` would miss an opportunity to generate a more efficient equality pattern and would generate a -1/0-1 pattern instead. Differential Revision: https://reviews.llvm.org/D123849
This commit is contained in:
parent
866bd4df47
commit
46a13a0ef8
|
@ -740,7 +740,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
|
|||
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
|
||||
const TargetLowering *TLI, const DataLayout *DL,
|
||||
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
|
||||
DomTreeUpdater *DTU) {
|
||||
DomTreeUpdater *DTU, const bool IsBCmp) {
|
||||
NumMemCmpCalls++;
|
||||
|
||||
// Early exit from expansion if -Oz.
|
||||
|
@ -760,7 +760,8 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
|
|||
}
|
||||
// TTI call to check if target would like to expand memcmp. Also, get the
|
||||
// available load sizes.
|
||||
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
|
||||
const bool IsUsedForZeroCmp =
|
||||
IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
|
||||
bool OptForSize = CI->getFunction()->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
|
||||
auto Options = TTI->enableMemCmpExpansion(OptForSize,
|
||||
|
@ -864,7 +865,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
|
|||
LibFunc Func;
|
||||
if (TLI->getLibFunc(*CI, Func) &&
|
||||
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
|
||||
expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) {
|
||||
expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -569,15 +569,12 @@ define i1 @length64_eq_const(i8* %X) nounwind optsize {
|
|||
define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize {
|
||||
; X86-LABEL: bcmp_length2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movzwl (%ecx), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movzwl (%eax), %edx
|
||||
; X86-NEXT: rolw $8, %cx
|
||||
; X86-NEXT: rolw $8, %dx
|
||||
; X86-NEXT: movzwl %cx, %eax
|
||||
; X86-NEXT: movzwl %dx, %ecx
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: cmpw (%ecx), %dx
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i32 2) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -584,13 +584,10 @@ define i1 @length64_eq_const(i8* %X) nounwind optsize {
|
|||
define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize {
|
||||
; X64-LABEL: bcmp_length2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: movzwl (%rsi), %ecx
|
||||
; X64-NEXT: rolw $8, %ax
|
||||
; X64-NEXT: rolw $8, %cx
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: movzwl %cx, %ecx
|
||||
; X64-NEXT: subl %ecx, %eax
|
||||
; X64-NEXT: movzwl (%rdi), %ecx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpw (%rsi), %cx
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -569,15 +569,12 @@ define i1 @length64_eq_const(i8* %X) nounwind !prof !14 {
|
|||
define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {
|
||||
; X86-LABEL: bcmp_length2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movzwl (%ecx), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movzwl (%eax), %edx
|
||||
; X86-NEXT: rolw $8, %cx
|
||||
; X86-NEXT: rolw $8, %dx
|
||||
; X86-NEXT: movzwl %cx, %eax
|
||||
; X86-NEXT: movzwl %dx, %ecx
|
||||
; X86-NEXT: subl %ecx, %eax
|
||||
; X86-NEXT: xorl %eax, %eax
|
||||
; X86-NEXT: cmpw (%ecx), %dx
|
||||
; X86-NEXT: setne %al
|
||||
; X86-NEXT: retl
|
||||
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i32 2) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -584,13 +584,10 @@ define i1 @length64_eq_const(i8* %X) nounwind !prof !14 {
|
|||
define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {
|
||||
; X64-LABEL: bcmp_length2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movzwl (%rdi), %eax
|
||||
; X64-NEXT: movzwl (%rsi), %ecx
|
||||
; X64-NEXT: rolw $8, %ax
|
||||
; X64-NEXT: rolw $8, %cx
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: movzwl %cx, %ecx
|
||||
; X64-NEXT: subl %ecx, %eax
|
||||
; X64-NEXT: movzwl (%rdi), %ecx
|
||||
; X64-NEXT: xorl %eax, %eax
|
||||
; X64-NEXT: cmpw (%rsi), %cx
|
||||
; X64-NEXT: setne %al
|
||||
; X64-NEXT: retq
|
||||
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind
|
||||
ret i32 %m
|
||||
|
|
|
@ -9,14 +9,9 @@ define i32 @bcmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
|
|||
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
|
||||
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
|
||||
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
|
||||
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
|
||||
; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
|
||||
; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]
|
||||
; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
|
||||
; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
|
||||
; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
|
||||
; X64-NEXT: ret i32 [[TMP11]]
|
||||
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
|
||||
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
|
||||
; X64-NEXT: ret i32 [[TMP6]]
|
||||
;
|
||||
%call = tail call i32 @bcmp(i8* %x, i8* %y, i64 8)
|
||||
ret i32 %call
|
||||
|
|
Loading…
Reference in a new issue