[Analyzer][solver] Simplification: Do a fixpoint iteration before the eq class merge
This reverts commit f02c5f3478
and
addresses the issue mentioned in D114619 differently.
Repeating the issue here:
Currently, during symbol simplification we remove the original member
symbol from the equivalence class (`ClassMembers` trait). However, we
keep the reverse link (`ClassMap` trait), in order to be able the query
the related constraints even for the old member. This asymmetry can lead
to a problem when we merge equivalence classes:
```
ClassA: [a, b] // ClassMembers trait,
a->a, b->a // ClassMap trait, a is the representative symbol
```
Now let,s delete `a`:
```
ClassA: [b]
a->a, b->a
```
Let's merge ClassA into the trivial class `c`:
```
ClassA: [c, b]
c->c, b->c, a->a
```
Now, after the merge operation, `c` and `a` are actually in different
equivalence classes, which is inconsistent.
This issue manifests in a test case (added in D103317):
```
void recurring_symbol(int b) {
if (b * b != b)
if ((b * b) * b * b != (b * b) * b)
if (b * b == 1)
}
```
Before the simplification we have these equivalence classes:
```
trivial EQ1: [b * b != b]
trivial EQ2: [(b * b) * b * b != (b * b) * b]
```
During the simplification with `b * b == 1`, EQ1 is merged with `1 != b`
`EQ1: [b * b != b, 1 != b]` and we remove the complex symbol, so
`EQ1: [1 != b]`
Then we start to simplify the only symbol in EQ2:
`(b * b) * b * b != (b * b) * b --> 1 * b * b != 1 * b --> b * b != b`
But `b * b != b` is such a symbol that had been removed previously from
EQ1, thus we reach the above mentioned inconsistency.
This patch addresses the issue by making it impossible to synthesise a
symbol that had been simplified before. We achieve this by simplifying
the given symbol to the absolute simplest form.
Differential Revision: https://reviews.llvm.org/D114887
This commit is contained in:
parent
ad88a37cea
commit
20f8733d4b
|
@ -601,6 +601,10 @@ public:
|
|||
LLVM_NODISCARD static inline Optional<bool>
|
||||
areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
|
||||
|
||||
/// Remove one member from the class.
|
||||
LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
|
||||
const SymbolRef Old);
|
||||
|
||||
/// Iterate over all symbols and try to simplify them.
|
||||
LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB,
|
||||
RangeSet::Factory &F,
|
||||
|
@ -2132,6 +2136,34 @@ inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
|
|||
return llvm::None;
|
||||
}
|
||||
|
||||
LLVM_NODISCARD ProgramStateRef
|
||||
EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
|
||||
|
||||
SymbolSet ClsMembers = getClassMembers(State);
|
||||
assert(ClsMembers.contains(Old));
|
||||
|
||||
// We don't remove `Old`'s Sym->Class relation for two reasons:
|
||||
// 1) This way constraints for the old symbol can still be found via it's
|
||||
// equivalence class that it used to be the member of.
|
||||
// 2) Performance and resource reasons. We can spare one removal and thus one
|
||||
// additional tree in the forest of `ClassMap`.
|
||||
|
||||
// Remove `Old`'s Class->Sym relation.
|
||||
SymbolSet::Factory &F = getMembersFactory(State);
|
||||
ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
|
||||
ClsMembers = F.remove(ClsMembers, Old);
|
||||
// Ensure another precondition of the removeMember function (we can check
|
||||
// this only with isEmpty, thus we have to do the remove first).
|
||||
assert(!ClsMembers.isEmpty() &&
|
||||
"Class should have had at least two members before member removal");
|
||||
// Overwrite the existing members assigned to this class.
|
||||
ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
|
||||
ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
|
||||
State = State->set<ClassMembers>(ClassMembersMap);
|
||||
|
||||
return State;
|
||||
}
|
||||
|
||||
// Re-evaluate an SVal with top-level `State->assume` logic.
|
||||
LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
|
||||
const RangeSet *Constraint,
|
||||
|
@ -2159,6 +2191,42 @@ LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
|
|||
Constraint->getMaxValue(), true);
|
||||
}
|
||||
|
||||
// Simplify the given symbol with the help of the SValBuilder. In
|
||||
// SValBuilder::symplifySval, we traverse the symbol tree and query the
|
||||
// constraint values for the sub-trees and if a value is a constant we do the
|
||||
// constant folding. Compound symbols might collapse to simpler symbol tree
|
||||
// that is still possible to further simplify. Thus, we do the simplification on
|
||||
// a new symbol tree until we reach the simplest form, i.e. the fixpoint.
|
||||
//
|
||||
// Consider the following symbol `(b * b) * b * b` which has this tree:
|
||||
// *
|
||||
// / \
|
||||
// * b
|
||||
// / \
|
||||
// / b
|
||||
// (b * b)
|
||||
// Now, if the `b * b == 1` new constraint is added then during the first
|
||||
// iteration we have the following transformations:
|
||||
// * *
|
||||
// / \ / \
|
||||
// * b --> b b
|
||||
// / \
|
||||
// / b
|
||||
// 1
|
||||
// We need another iteration to reach the final result `1`.
|
||||
LLVM_NODISCARD
|
||||
static SVal simplifyUntilFixpoint(SValBuilder &SVB, ProgramStateRef State,
|
||||
const SymbolRef Sym) {
|
||||
SVal Val = SVB.makeSymbolVal(Sym);
|
||||
SVal SimplifiedVal = SVB.simplifySVal(State, Val);
|
||||
// Do the simplification until we can.
|
||||
while (SimplifiedVal != Val) {
|
||||
Val = SimplifiedVal;
|
||||
SimplifiedVal = SVB.simplifySVal(State, Val);
|
||||
}
|
||||
return SimplifiedVal;
|
||||
}
|
||||
|
||||
// Iterate over all symbols and try to simplify them. Once a symbol is
|
||||
// simplified then we check if we can merge the simplified symbol's equivalence
|
||||
// class to this class. This way, we simplify not just the symbols but the
|
||||
|
@ -2170,7 +2238,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
|
|||
SymbolSet ClassMembers = Class.getClassMembers(State);
|
||||
for (const SymbolRef &MemberSym : ClassMembers) {
|
||||
|
||||
const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
|
||||
const SVal SimplifiedMemberVal =
|
||||
simplifyUntilFixpoint(SVB, State, MemberSym);
|
||||
const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
|
||||
|
||||
// The symbol is collapsed to a constant, check if the current State is
|
||||
|
@ -2196,6 +2265,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
|
|||
continue;
|
||||
|
||||
assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
|
||||
// Remove the old and more complex symbol.
|
||||
State = find(State, MemberSym).removeMember(State, MemberSym);
|
||||
|
||||
// Query the class constraint again b/c that may have changed during the
|
||||
// merge above.
|
||||
|
@ -2207,25 +2278,19 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
|
|||
// About performance and complexity: Let us assume that in a State we
|
||||
// have N non-trivial equivalence classes and that all constraints and
|
||||
// disequality info is related to non-trivial classes. In the worst case,
|
||||
// we can simplify only one symbol of one class in each iteration.
|
||||
// we can simplify only one symbol of one class in each iteration. The
|
||||
// number of symbols in one class cannot grow b/c we replace the old
|
||||
// symbol with the simplified one. Also, the number of the equivalence
|
||||
// classes can decrease only, b/c the algorithm does a merge operation
|
||||
// optionally. We need N iterations in this case to reach the fixpoint.
|
||||
// Thus, the steps needed to be done in the worst case is proportional to
|
||||
// N*N.
|
||||
//
|
||||
// The number of the equivalence classes can decrease only, because the
|
||||
// algorithm does a merge operation optionally.
|
||||
// ASSUMPTION G: Let us assume that the
|
||||
// number of symbols in one class cannot grow because we replace the old
|
||||
// symbol with the simplified one.
|
||||
// If assumption G holds then we need N iterations in this case to reach
|
||||
// the fixpoint. Thus, the steps needed to be done in the worst case is
|
||||
// proportional to N*N.
|
||||
// This worst case scenario can be extended to that case when we have
|
||||
// trivial classes in the constraints and in the disequality map. This
|
||||
// case can be reduced to the case with a State where there are only
|
||||
// non-trivial classes. This is because a merge operation on two trivial
|
||||
// classes results in one non-trivial class.
|
||||
//
|
||||
// Empirical measurements show that if we relax assumption G (i.e. if we
|
||||
// do not replace the complex symbol just add the simplified one) then
|
||||
// the runtime and memory consumption does not grow noticeably.
|
||||
State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
|
||||
if (!State)
|
||||
return nullptr;
|
||||
|
|
|
@ -16,6 +16,6 @@ void test_equivalence_classes(int a, int b, int c, int d) {
|
|||
}
|
||||
|
||||
// CHECK: "equivalence_classes": [
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "(reg_$0<int a>) != (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ]
|
||||
// CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: [ "reg_$0<int a>", "reg_$2<int c>", "reg_$3<int d>" ]
|
||||
// CHECK-NEXT: ],
|
||||
|
|
|
@ -12,18 +12,18 @@ void test(int a, int b, int c, int d) {
|
|||
if (a + b + c == d)
|
||||
return;
|
||||
clang_analyzer_printState();
|
||||
// CHECK: "disequality_info": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "reg_$3<int d>" ]]
|
||||
// CHECK-NEXT: },
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ]]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK: "disequality_info": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "reg_$3<int d>" ]]
|
||||
// CHECK-NEXT: },
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)" ]]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ],
|
||||
|
||||
|
||||
// Simplification starts here.
|
||||
|
@ -32,33 +32,32 @@ void test(int a, int b, int c, int d) {
|
|||
clang_analyzer_printState();
|
||||
// CHECK: "disequality_info": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: "class": [ "(reg_$0<int a>) + (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "reg_$3<int d>" ]]
|
||||
// CHECK-NEXT: },
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)" ]]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$2<int c>)" ]]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ],
|
||||
|
||||
if (c != 0)
|
||||
return;
|
||||
clang_analyzer_printState();
|
||||
|
||||
// CHECK: "disequality_info": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)", "reg_$0<int a>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "reg_$3<int d>" ]]
|
||||
// CHECK-NEXT: },
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)", "reg_$0<int a>" ]]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK: "disequality_info": [
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "reg_$0<int a>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "reg_$3<int d>" ]]
|
||||
// CHECK-NEXT: },
|
||||
// CHECK-NEXT: {
|
||||
// CHECK-NEXT: "class": [ "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: "disequal_to": [
|
||||
// CHECK-NEXT: [ "reg_$0<int a>" ]]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: ],
|
||||
|
||||
// Keep the symbols and the constraints! alive.
|
||||
(void)(a * b * c * d);
|
||||
|
|
|
@ -24,15 +24,14 @@ void test(int a, int b, int c) {
|
|||
if (b != 0)
|
||||
return;
|
||||
clang_analyzer_printState();
|
||||
// CHECK: "constraints": [
|
||||
// CHECK-NEXT: { "symbol": "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "equivalence_classes": [
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) != (reg_$2<int c>)", "(reg_$0<int a>) != (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: [ "(reg_$0<int a>) + (reg_$1<int b>)", "reg_$0<int a>", "reg_$2<int c>" ]
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK: "constraints": [
|
||||
// CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$2<int c>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "equivalence_classes": [
|
||||
// CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$2<int c>)" ],
|
||||
// CHECK-NEXT: [ "reg_$0<int a>", "reg_$2<int c>" ]
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "disequality_info": null,
|
||||
|
||||
// Keep the symbols and the constraints! alive.
|
||||
|
|
|
@ -27,20 +27,17 @@ void test(int a, int b, int c, int d) {
|
|||
if (b != 0)
|
||||
return;
|
||||
clang_analyzer_printState();
|
||||
// CHECK: "constraints": [
|
||||
// CHECK-NEXT: { "symbol": "(((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "((reg_$0<int a>) + (reg_$2<int c>)) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "(reg_$2<int c>) + (reg_$1<int b>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "reg_$2<int c>", "range": "{ [0, 0] }" }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "equivalence_classes": [
|
||||
// CHECK-NEXT: [ "(((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)) != (reg_$3<int d>)", "((reg_$0<int a>) + (reg_$2<int c>)) != (reg_$3<int d>)", "(reg_$0<int a>) != (reg_$3<int d>)" ],
|
||||
// CHECK-NEXT: [ "((reg_$0<int a>) + (reg_$1<int b>)) + (reg_$2<int c>)", "(reg_$0<int a>) + (reg_$2<int c>)", "reg_$0<int a>", "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: [ "(reg_$2<int c>) + (reg_$1<int b>)", "reg_$2<int c>" ]
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "disequality_info": null,
|
||||
// CHECK: "constraints": [
|
||||
// CHECK-NEXT: { "symbol": "(reg_$0<int a>) != (reg_$3<int d>)", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "reg_$1<int b>", "range": "{ [0, 0] }" },
|
||||
// CHECK-NEXT: { "symbol": "reg_$2<int c>", "range": "{ [0, 0] }" }
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "equivalence_classes": [
|
||||
// CHECK-NEXT: [ "(reg_$0<int a>) != (reg_$3<int d>)" ],
|
||||
// CHECK-NEXT: [ "reg_$0<int a>", "reg_$3<int d>" ],
|
||||
// CHECK-NEXT: [ "reg_$2<int c>" ]
|
||||
// CHECK-NEXT: ],
|
||||
// CHECK-NEXT: "disequality_info": null,
|
||||
|
||||
// Keep the symbols and the constraints! alive.
|
||||
(void)(a * b * c * d);
|
||||
|
|
Loading…
Reference in a new issue