From 42098c4a30de4c79f6d1119d9557e9c3da2fe90c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 7 Jan 2022 17:44:23 -0500 Subject: [PATCH] GlobalISel: Fix legalization error where CSE leaves behind dead defs If the conversion artifact introduced in the unmerge of cast of merge combine already existed in the function, this would introduce dead copies which kept the old casts around, neither of which were deleted, and would fail legalization. This would fail as follows: The G_UNMERGE_VALUES of the G_SEXT of the G_BUILD_VECTOR would introduce a G_SEXT for each of the scalars. Some of the required G_SEXTs already existed in the function, so CSE moves them up in the function and introduces a copy to the original result register. The introduced CSE copies are dead, since the originally G_SEXTs were already directly used. These copies add a use to the illegal G_SEXTs, so they are not deleted. The artifact combiner does not see the defs that need to be updated, since it was hidden inside the CSE builder. I see 2 potential fixes, and opted for the mechanically simpler one, which is to just not insert the cast if the result operand isn't used. Alternatively, we could not insert the cast directly into the result register, and use replaceRegOrBuildCopy similar to the case where there is no conversion. I suspect this is a wider problem in the artifact combiner. --- .../GlobalISel/LegalizationArtifactCombiner.h | 9 +- ...artifact-combiner-cse-leaves-dead-cast.mir | 114 ++++++++++++++++++ 2 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 886b3af834d7..e82b2c5d008c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -977,10 +977,13 @@ public: Builder.setInstr(MI); for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { - Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); Register DefReg = MI.getOperand(Idx).getReg(); - Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); - UpdatedDefs.push_back(DefReg); + Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); + + if (!MRI.use_empty(DefReg)) { + Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); + UpdatedDefs.push_back(DefReg); + } } markInstAndDefDead(MI, *MergeI, DeadInsts); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir new file mode 100644 index 000000000000..b99b4cf54c5b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-cse-leaves-dead-cast.mir @@ -0,0 +1,114 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck %s + +# The G_UNMERGE_VALUES of the G_SEXT of G_BUILD_VECTOR will introduce +# a new G_SEXT for each of the scalars. The sext of %and[4-7] already +# exist, so the CSE MIR builder in the artifact combiner would re-use +# those instructions and introduce dead copies which were never +# deleted, and also kept the illegal %sext[4-7] alive which would fail +# legalization. + +--- +name: artifact_combiner_sext_already_exists +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: artifact_combiner_sext_already_exists + ; CHECK: %undef:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: %load:_(s32) = G_LOAD %undef(p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) + ; CHECK-NEXT: %unmerge3_0:_(s1) = G_TRUNC %load(s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR %load, [[C]](s32) + ; CHECK-NEXT: %unmerge3_1:_(s1) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR %load, [[C1]](s32) + ; CHECK-NEXT: %unmerge3_2:_(s1) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR %load, [[C2]](s32) + ; CHECK-NEXT: %unmerge3_3:_(s1) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR %load, [[C3]](s32) + ; CHECK-NEXT: %unmerge3_4:_(s1) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR %load, [[C4]](s32) + ; CHECK-NEXT: %unmerge3_5:_(s1) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR %load, [[C5]](s32) + ; CHECK-NEXT: %unmerge3_6:_(s1) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR %load, [[C6]](s32) + ; CHECK-NEXT: %unmerge3_7:_(s1) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: %negone:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: %and0:_(s1) = G_XOR %unmerge3_0, %negone + ; CHECK-NEXT: %and1:_(s1) = G_XOR %unmerge3_1, %negone + ; CHECK-NEXT: %and2:_(s1) = G_XOR %unmerge3_2, %negone + ; CHECK-NEXT: %and3:_(s1) = G_XOR %unmerge3_3, %negone + ; CHECK-NEXT: %and4:_(s1) = G_XOR %unmerge3_4, %negone + ; CHECK-NEXT: %and5:_(s1) = G_XOR %unmerge3_5, %negone + ; CHECK-NEXT: %and6:_(s1) = G_XOR %unmerge3_6, %negone + ; CHECK-NEXT: %and7:_(s1) = G_XOR %unmerge3_7, %negone + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT %and0(s1) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT]], [[C10]] + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT %and1(s1) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT1]], [[C10]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT %and2(s1) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT2]], [[C10]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C8]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT %and3(s1) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT3]], [[C10]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) + ; CHECK-NEXT: %merge0:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT %and4(s1) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT4]], [[C10]] + ; CHECK-NEXT: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT %and5(s1) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT5]], [[C10]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT %and6(s1) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SEXT6]], [[C10]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL4]] + ; CHECK-NEXT: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT %and7(s1) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SEXT7]], [[C10]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) + ; CHECK-NEXT: %merge1:_(s32) = G_OR [[OR3]], [[SHL5]] + ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32) + ; CHECK-NEXT: %null:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: G_STORE %bv(<2 x s32>), %null(p1) :: (store (<2 x s32>), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %undef:_(p4) = G_IMPLICIT_DEF + %load:_(s32) = G_LOAD %undef :: (dereferenceable invariant load (s8), align 16, addrspace 4) + %trunc:_(s8) = G_TRUNC %load + %unmerge3_0:_(s1), %unmerge3_1:_(s1), %unmerge3_2:_(s1), %unmerge3_3:_(s1), %unmerge3_4:_(s1), %unmerge3_5:_(s1), %unmerge3_6:_(s1), %unmerge3_7:_(s1) = G_UNMERGE_VALUES %trunc + %negone:_(s1) = G_CONSTANT i1 true + %and0:_(s1) = G_XOR %unmerge3_0, %negone + %and1:_(s1) = G_XOR %unmerge3_1, %negone + %and2:_(s1) = G_XOR %unmerge3_2, %negone + %and3:_(s1) = G_XOR %unmerge3_3, %negone + %and4:_(s1) = G_XOR %unmerge3_4, %negone + %and5:_(s1) = G_XOR %unmerge3_5, %negone + %and6:_(s1) = G_XOR %unmerge3_6, %negone + %and7:_(s1) = G_XOR %unmerge3_7, %negone + %boolvec:_(<8 x s1>) = G_BUILD_VECTOR %and0(s1), %and1(s1), %and2(s1), %and3(s1), %and4(s1), %and5(s1), %and6(s1), %and7(s1) + %sext:_(<8 x s8>) = G_SEXT %boolvec(<8 x s1>) + %sext_lo:_(<4 x s8>), %sext_hi:_(<4 x s8>) = G_UNMERGE_VALUES %sext(<8 x s8>) + %sext0:_(s8), %sext1:_(s8), %sext2:_(s8), %sext3:_(s8) = G_UNMERGE_VALUES %sext_lo(<4 x s8>) + %merge0:_(s32) = G_MERGE_VALUES %sext0(s8), %sext1(s8), %sext2(s8), %sext3(s8) + %sext4:_(s8) = G_SEXT %and4(s1) + %sext5:_(s8) = G_SEXT %and5(s1) + %sext6:_(s8) = G_SEXT %and6(s1) + %sext7:_(s8) = G_SEXT %and7(s1) + %merge1:_(s32) = G_MERGE_VALUES %sext4, %sext5, %sext6, %sext7 + %bv:_(<2 x s32>) = G_BUILD_VECTOR %merge0(s32), %merge1(s32) + %null:_(p1) = G_CONSTANT i64 0 + G_STORE %bv(<2 x s32>), %null :: (store (<2 x s32>), addrspace 1) + S_ENDPGM 0 + +...