[DSE,MSSA] Port partial store merging.

Port partial constant store merging logic to MemorySSA backed DSE. The
heavy lifting is done by the existing helper function. It is used in
context where we already ensured that the later instruction can
eliminate the earlier one, if it is a complete overwrite.
This commit is contained in:
Florian Hahn 2020-06-15 15:40:07 +01:00
parent 32c757e4f8
commit 120c059292
6 changed files with 63 additions and 49 deletions

View file

@ -2064,6 +2064,28 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset,
InstWriteOffset, NI, IOL, AA, &F);
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
auto *Earlier = dyn_cast<StoreInst>(NI);
auto *Later = dyn_cast<StoreInst>(SI);
if (Constant *Merged = tryToMergePartialOverlappingStores(
Earlier, Later, InstWriteOffset, DepWriteOffset, DL, &AA,
&DT)) {
// Update stored value of earlier store to merged constant.
Earlier->setOperand(0, Merged);
++NumModifiedStores;
MadeChange = true;
// Remove later store and remove any outstanding overlap intervals for
// the updated store.
State.deleteDeadInstruction(Later);
auto I = State.IOLs.find(Earlier->getParent());
if (I != State.IOLs.end())
I->second.erase(Earlier);
break;
}
}
ToCheck.insert(NextDef->getDefiningAccess());
if (OR == OW_Complete) {
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI

View file

@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; XFAIL: *
; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"

View file

@ -6,15 +6,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
define void @test4(i32* noalias %P) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: store i32 0, i32* [[P:%.*]]
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]]
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: store i32 0, i32* [[P]]
; CHECK-NEXT: store i32 0, i32* [[P]], align 4
; CHECK-NEXT: ret void
;
store i32 0, i32* %P
@ -37,7 +37,7 @@ define void @test5(i32* noalias %P) {
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: store i32 0, i32* [[P:%.*]]
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
br i1 true, label %bb1, label %bb2
@ -58,10 +58,10 @@ define void @test8(i32* %P, i32* %Q) {
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: store i32 1, i32* [[Q:%.*]]
; CHECK-NEXT: store i32 1, i32* [[Q:%.*]], align 4
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: store i32 0, i32* [[P:%.*]]
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
br i1 true, label %bb1, label %bb2
@ -78,15 +78,13 @@ bb3:
define void @test10(i32* noalias %P) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
; CHECK-NEXT: store i32 0, i32* [[P]]
; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: store i8 1, i8* [[P2]]
; CHECK-NEXT: ret void
;
%P2 = bitcast i32* %P to i8*

View file

@ -5,15 +5,13 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
define void @second_store_smaller_1(i32* noalias %P, i1 %c) {
; CHECK-LABEL: @second_store_smaller_1(
; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[P_I16:%.*]] = bitcast i32* [[P]] to i16*
; CHECK-NEXT: store i16 0, i16* [[P_I16]], align 2
; CHECK-NEXT: ret void
;
store i32 1, i32* %P
@ -30,15 +28,13 @@ bb3:
define void @second_store_smaller_2(i32* noalias %P, i1 %c) {
; CHECK-LABEL: @second_store_smaller_2(
; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4
; CHECK-NEXT: store i32 12345, i32* [[P:%.*]], align 4
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[P_I16:%.*]] = bitcast i32* [[P]] to i16*
; CHECK-NEXT: store i16 12345, i16* [[P_I16]], align 2
; CHECK-NEXT: ret void
;
store i32 1, i32* %P

View file

@ -10,23 +10,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
declare void @llvm.init.trampoline(i8*, i8*, i8*)
; Do not delete stores that are only partially killed.
define i32 @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[V:%.*]] = alloca i32
; CHECK-NEXT: store i32 1234567, i32* [[V]]
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[V]]
; CHECK-NEXT: ret i32 [[X]]
;
%V = alloca i32
store i32 1234567, i32* %V
%V2 = bitcast i32* %V to i8*
store i8 0, i8* %V2
%X = load i32, i32* %V
ret i32 %X
}
; Test for byval handling.
%struct.x = type { i32, i32, i32, i32 }
define void @test9(%struct.x* byval %a) nounwind {
@ -135,18 +118,3 @@ bb1:
bb2:
ret i32 0
}
define void @test43a(i32* %P, i32* noalias %Q) {
; CHECK-LABEL: @test43a(
; CHECK-NEXT: entry:
; CHECK-NEXT: store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
; CHECK-NEXT: ret void
;
entry:
store atomic i32 1, i32* %P unordered, align 4
%P2 = bitcast i32* %P to i8*
store atomic i32 2, i32* %Q unordered, align 4
store atomic i8 3, i8* %P2 unordered, align 4
ret void
}

View file

@ -119,11 +119,27 @@ define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %
ret void
}
; Do not delete stores that are only partially killed.
define i32 @test8() {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 1234567, i32* [[V]], align 4
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4
; CHECK-NEXT: ret i32 [[X]]
;
%V = alloca i32
store i32 1234567, i32* %V
%V2 = bitcast i32* %V to i8*
store i8 0, i8* %V2
%X = load i32, i32* %V
ret i32 %X
}
; va_arg has fuzzy dependence, the store shouldn't be zapped.
define double @test10(i8* %X) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8*
; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8*, align 8
; CHECK-NEXT: store i8* [[X:%.*]], i8** [[X_ADDR]], align 8
; CHECK-NEXT: [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double
; CHECK-NEXT: ret double [[TMP_0]]
@ -579,3 +595,18 @@ define void @test42a(i32* %P, i32* %Q) {
store atomic i8 3, i8* %P2 unordered, align 4
ret void
}
define void @test43a(i32* %P, i32* noalias %Q) {
; CHECK-LABEL: @test43a(
; CHECK-NEXT: entry:
; CHECK-NEXT: store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
; CHECK-NEXT: ret void
;
entry:
store atomic i32 1, i32* %P unordered, align 4
%P2 = bitcast i32* %P to i8*
store atomic i32 2, i32* %Q unordered, align 4
store atomic i8 3, i8* %P2 unordered, align 4
ret void
}