[analyzer] Generalize bitwise AND rules for ranges

Summary: Previously the current solver started reasoning about bitwise AND expressions only when one of the operands is a constant. However, very similar logic could be applied to ranges. This commit addresses this shortcoming. Additionally, it refines how we deal with negative operands. rdar://problem/54359410 Differential Revision: https://reviews.llvm.org/D79434
2020-05-05 19:42:33 +03:00 · 2020-05-05 19:42:33 +03:00 · 2a09daff0f
parent 47c4b8bd68
commit 2a09daff0f
4 changed files with 176 additions and 85 deletions
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@ -430,9 +430,9 @@ private:
                               RangeSet RHS, QualType T) {
    switch (Op) {
    case BO_Or:
-      return VisitOrOperator(LHS, RHS, T);
+      return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
    case BO_And:
-      return VisitAndOperator(LHS, RHS, T);
+      return VisitBinaryOperator<BO_And>(LHS, RHS, T);
    default:
      return infer(T);
    }
@ -464,19 +464,19 @@ private:
                 ValueFactory.Convert(To, Origin.To()));
  }

-  RangeSet VisitOrOperator(RangeSet LHS, RangeSet RHS, QualType T) {
+  template <BinaryOperator::Opcode Op>
+  RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
    // We should propagate information about unfeasbility of one of the
    // operands to the resulting range.
    if (LHS.isEmpty() || RHS.isEmpty()) {
      return RangeFactory.getEmptySet();
    }

-    APSIntType ResultType = ValueFactory.getAPSIntType(T);
-    RangeSet DefaultRange = infer(T);
-
    Range CoarseLHS = fillGaps(LHS);
    Range CoarseRHS = fillGaps(RHS);

+    APSIntType ResultType = ValueFactory.getAPSIntType(T);
+
    // We need to convert ranges to the resulting type, so we can compare values
    // and combine them in a meaningful (in terms of the given operation) way.
    auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
@ -485,74 +485,14 @@ private:
    // It is hard to reason about ranges when conversion changes
    // borders of the ranges.
    if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
-      return DefaultRange;
+      return infer(T);
    }

-    llvm::APSInt Zero = ResultType.getZeroValue();
-
-    bool IsLHSPositiveOrZero = ConvertedCoarseLHS->From() >= Zero;
-    bool IsRHSPositiveOrZero = ConvertedCoarseRHS->From() >= Zero;
-
-    bool IsLHSNegative = ConvertedCoarseLHS->To() < Zero;
-    bool IsRHSNegative = ConvertedCoarseRHS->To() < Zero;
-
-    // Check if both ranges have the same sign.
-    if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
-        (IsLHSNegative && IsRHSNegative)) {
-      // The result is definitely greater or equal than any of the operands.
-      const llvm::APSInt &Min =
-          std::max(ConvertedCoarseLHS->From(), ConvertedCoarseRHS->From());
-
-      // We estimate maximal value for positives as the maximal value for the
-      // given type.  For negatives, we estimate it with -1 (e.g. 0x11111111).
-      //
-      // TODO: We basically, limit the resulting range from below (in absolute
-      //       numbers), but don't do anything with the upper bound.
-      //       For positive operands, it can be done as follows: for the upper
-      //       bound of LHS and RHS we calculate the most significant bit set.
-      //       Let's call it the N-th bit.  Then we can estimate the maximal
-      //       number to be 2^(N+1)-1, i.e. the number with all the bits up to
-      //       the N-th bit set.
-      const llvm::APSInt &Max = IsLHSNegative
-                                    ? ValueFactory.getValue(--Zero)
-                                    : ValueFactory.getMaxValue(ResultType);
-
-      return {RangeFactory, ValueFactory.getValue(Min), Max};
-    }
-
-    // Otherwise, let's check if at least one of the operands is negative.
-    if (IsLHSNegative || IsRHSNegative) {
-      // This means that the result is definitely negative as well.
-      return {RangeFactory, ValueFactory.getMinValue(ResultType),
-              ValueFactory.getValue(--Zero)};
-    }
-
-    // It is pretty hard to reason about operands with different signs
-    // (and especially with possibly different signs).  We simply check if it
-    // can be zero.  In order to conclude that the result could not be zero,
-    // at least one of the operands should be definitely not zero itself.
-    if (!ConvertedCoarseLHS->Includes(Zero) ||
-        !ConvertedCoarseRHS->Includes(Zero)) {
-      return assumeNonZero(DefaultRange, T);
-    }
-
-    // Nothing much else to do here.
-    return DefaultRange;
+    return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
  }

-  RangeSet VisitAndOperator(RangeSet LHS, RangeSet RHS, QualType T) {
-    // TODO: generalize for the ranged RHS.
-    if (const llvm::APSInt *RHSConstant = RHS.getConcreteValue()) {
-      const llvm::APSInt &Zero = ValueFactory.getAPSIntType(T).getZeroValue();
-
-      // For unsigned types, or positive RHS,
-      // bitwise-and output is always smaller-or-equal than RHS (assuming two's
-      // complement representation of signed types).
-      if (T->isUnsignedIntegerType() || *RHSConstant >= Zero) {
-        return LHS.Intersect(ValueFactory, RangeFactory,
-                             ValueFactory.getMinValue(T), *RHSConstant);
-      }
-    }
+  template <BinaryOperator::Opcode Op>
+  RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
    return infer(T);
  }

@ -592,6 +532,109 @@ private:
  ProgramStateRef State;
 };

+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
+                                                           QualType T) {
+  APSIntType ResultType = ValueFactory.getAPSIntType(T);
+  llvm::APSInt Zero = ResultType.getZeroValue();
+
+  bool IsLHSPositiveOrZero = LHS.From() >= Zero;
+  bool IsRHSPositiveOrZero = RHS.From() >= Zero;
+
+  bool IsLHSNegative = LHS.To() < Zero;
+  bool IsRHSNegative = RHS.To() < Zero;
+
+  // Check if both ranges have the same sign.
+  if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
+      (IsLHSNegative && IsRHSNegative)) {
+    // The result is definitely greater or equal than any of the operands.
+    const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
+
+    // We estimate maximal value for positives as the maximal value for the
+    // given type.  For negatives, we estimate it with -1 (e.g. 0x11111111).
+    //
+    // TODO: We basically, limit the resulting range from below, but don't do
+    //       anything with the upper bound.
+    //
+    //       For positive operands, it can be done as follows: for the upper
+    //       bound of LHS and RHS we calculate the most significant bit set.
+    //       Let's call it the N-th bit.  Then we can estimate the maximal
+    //       number to be 2^(N+1)-1, i.e. the number with all the bits up to
+    //       the N-th bit set.
+    const llvm::APSInt &Max = IsLHSNegative
+                                  ? ValueFactory.getValue(--Zero)
+                                  : ValueFactory.getMaxValue(ResultType);
+
+    return {RangeFactory, ValueFactory.getValue(Min), Max};
+  }
+
+  // Otherwise, let's check if at least one of the operands is negative.
+  if (IsLHSNegative || IsRHSNegative) {
+    // This means that the result is definitely negative as well.
+    return {RangeFactory, ValueFactory.getMinValue(ResultType),
+            ValueFactory.getValue(--Zero)};
+  }
+
+  RangeSet DefaultRange = infer(T);
+
+  // It is pretty hard to reason about operands with different signs
+  // (and especially with possibly different signs).  We simply check if it
+  // can be zero.  In order to conclude that the result could not be zero,
+  // at least one of the operands should be definitely not zero itself.
+  if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
+    return assumeNonZero(DefaultRange, T);
+  }
+
+  // Nothing much else to do here.
+  return DefaultRange;
+}
+
+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
+                                                            Range RHS,
+                                                            QualType T) {
+  APSIntType ResultType = ValueFactory.getAPSIntType(T);
+  llvm::APSInt Zero = ResultType.getZeroValue();
+
+  bool IsLHSPositiveOrZero = LHS.From() >= Zero;
+  bool IsRHSPositiveOrZero = RHS.From() >= Zero;
+
+  bool IsLHSNegative = LHS.To() < Zero;
+  bool IsRHSNegative = RHS.To() < Zero;
+
+  // Check if both ranges have the same sign.
+  if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
+      (IsLHSNegative && IsRHSNegative)) {
+    // The result is definitely less or equal than any of the operands.
+    const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
+
+    // We conservatively estimate lower bound to be the smallest positive
+    // or negative value corresponding to the sign of the operands.
+    const llvm::APSInt &Min = IsLHSNegative
+                                  ? ValueFactory.getMinValue(ResultType)
+                                  : ValueFactory.getValue(Zero);
+
+    return {RangeFactory, Min, Max};
+  }
+
+  // Otherwise, let's check if at least one of the operands is positive.
+  if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
+    // This makes result definitely positive.
+    //
+    // We can also reason about a maximal value by finding the maximal
+    // value of the positive operand.
+    const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
+
+    // The minimal value on the other hand is much harder to reason about.
+    // The only thing we know for sure is that the result is positive.
+    return {RangeFactory, ValueFactory.getValue(Zero),
+            ValueFactory.getValue(Max)};
+  }
+
+  // Nothing much else to do here.
+  return infer(T);
+}
+
 class RangeConstraintManager : public RangedConstraintManager {
 public:
  RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
--- a/clang/test/Analysis/constant-folding.c
+++ b/clang/test/Analysis/constant-folding.c
@ -78,19 +78,20 @@ void testMixedTypeComparisons (char a, unsigned long b) {
 }

 void testBitwiseRules(unsigned int a, int b, int c) {
-  clang_analyzer_eval((a | 1) >= 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | 1) >= 1);   // expected-warning{{TRUE}}
  clang_analyzer_eval((a | -1) >= -1); // expected-warning{{TRUE}}
-  clang_analyzer_eval((a | 2) >= 2); // expected-warning{{TRUE}}
-  clang_analyzer_eval((a | 5) >= 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | 2) >= 2);   // expected-warning{{TRUE}}
+  clang_analyzer_eval((a | 5) >= 5);   // expected-warning{{TRUE}}
  clang_analyzer_eval((a | 10) >= 10); // expected-warning{{TRUE}}

  // Argument order should not influence this
  clang_analyzer_eval((1 | a) >= 1); // expected-warning{{TRUE}}

-  clang_analyzer_eval((a & 1) <= 1); // expected-warning{{TRUE}}
-  clang_analyzer_eval((a & 2) <= 2); // expected-warning{{TRUE}}
-  clang_analyzer_eval((a & 5) <= 5); // expected-warning{{TRUE}}
-  clang_analyzer_eval((a & 10) <= 10); // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 1) <= 1);    // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 1) >= 0);    // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 2) <= 2);    // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 5) <= 5);    // expected-warning{{TRUE}}
+  clang_analyzer_eval((a & 10) <= 10);  // expected-warning{{TRUE}}
  clang_analyzer_eval((a & -10) <= 10); // expected-warning{{UNKNOWN}}

  // Again, check for different argument order.
@ -104,22 +105,37 @@ void testBitwiseRules(unsigned int a, int b, int c) {
  clang_analyzer_eval((b | 1) > 0); // expected-warning{{UNKNOWN}}

  // Even for signed values, bitwise OR with a non-zero is always non-zero.
-  clang_analyzer_eval((b | 1) == 0); // expected-warning{{FALSE}}
+  clang_analyzer_eval((b | 1) == 0);  // expected-warning{{FALSE}}
  clang_analyzer_eval((b | -2) == 0); // expected-warning{{FALSE}}
  clang_analyzer_eval((b | 10) == 0); // expected-warning{{FALSE}}
-  clang_analyzer_eval((b | 0) == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval((b | 0) == 0);  // expected-warning{{UNKNOWN}}
  clang_analyzer_eval((b | -2) >= 0); // expected-warning{{FALSE}}

  // Check that we can operate with negative ranges
  if (b < 0) {
    clang_analyzer_eval((b | -1) == -1);   // expected-warning{{TRUE}}
    clang_analyzer_eval((b | -10) >= -10); // expected-warning{{TRUE}}
+    clang_analyzer_eval((b & 0) == 0);     // expected-warning{{TRUE}}
+    clang_analyzer_eval((b & -10) <= -10); // expected-warning{{TRUE}}
+    clang_analyzer_eval((b & 5) >= 0);     // expected-warning{{TRUE}}

    int e = (b | -5);
    clang_analyzer_eval(e >= -5 && e <= -1); // expected-warning{{TRUE}}

    if (b < -20) {
-      clang_analyzer_eval((b | e) >= -5); // expected-warning{{TRUE}}
+      clang_analyzer_eval((b | e) >= -5);    // expected-warning{{TRUE}}
+      clang_analyzer_eval((b & -10) < -20);  // expected-warning{{TRUE}}
+      clang_analyzer_eval((b & e) < -20);    // expected-warning{{TRUE}}
+      clang_analyzer_eval((b & -30) <= -30); // expected-warning{{TRUE}}
+
+      if (c >= -30 && c <= -10) {
+        clang_analyzer_eval((b & c) <= -20); // expected-warning{{TRUE}}
+      }
+    }
+
+    if (a <= 40) {
+      int g = (int)a & b;
+      clang_analyzer_eval(g <= 40 && g >= 0); // expected-warning{{TRUE}}
    }

    // Check that we can reason about the result even if know nothing
@ -135,6 +151,11 @@ void testBitwiseRules(unsigned int a, int b, int c) {
    // the types are not the same, but we still can convert operand
    // ranges.
    clang_analyzer_eval((a | b) >= 10); // expected-warning{{TRUE}}
+    clang_analyzer_eval((a & b) <= 30); // expected-warning{{TRUE}}
+
+    if (b <= 20) {
+      clang_analyzer_eval((a & b) <= 20); // expected-warning{{TRUE}}
+    }
  }

  // Check that dynamically computed constants also work.
@ -149,11 +170,7 @@ void testBitwiseRules(unsigned int a, int b, int c) {
    clang_analyzer_eval((a | 20) >= 20); // expected-warning{{TRUE}}
  }

-  // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
-  //       Resulting ranges for the following cases are infeasible.
-  //       This is what causes paradoxical results below.
  if (a > 10) {
-    clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
-    clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+    clang_analyzer_eval((a & 1) <= 1); // expected-warning{{TRUE}}
  }
 }
--- a/clang/test/Analysis/switch-case.c
+++ b/clang/test/Analysis/switch-case.c
@ -218,3 +218,14 @@ void testConstant() {
    break;
  }
 }
+
+void testExhaustiveSwitch(unsigned int a) {
+  switch (a & 5) {
+  case 0 ... 5:
+    clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+    break;
+  default:
+    clang_analyzer_warnIfReached(); // no-warning
+    break;
+  }
+}
--- a/clang/test/Analysis/uninit-exhaustive-switch-bug.c
+++ b/clang/test/Analysis/uninit-exhaustive-switch-bug.c
@ -0,0 +1,20 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s
+
+// rdar://problem/54359410
+// expected-no-diagnostics
+
+int rand();
+
+void test() {
+  int offset = 0;
+  int value;
+  int test = rand();
+  switch (test & 0x1) {
+  case 0:
+  case 1:
+    value = 0;
+    break;
+  }
+
+  offset += value; // no-warning
+}