llvm/mlir/tools/mlir-opt/mlir-opt.cpp

488 lines
17 KiB
C++
Raw Normal View History

//===- mlir-opt.cpp - MLIR Optimizer Driver -------------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This is a command line utility that parses an MLIR file, runs an optimization
// pass, then prints the result back out. It is designed to support unit
// testing.
//
//===----------------------------------------------------------------------===//
Introduce memref bound checking. Introduce analysis to check memref accesses (in MLFunctions) for out of bound ones. It works as follows: $ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ #map0 = (d0, d1) -> (d0, d1) #map1 = (d0, d1) -> (d0 * 128 - d1) mlfunc @test() { %0 = alloc() : memref<9x9xi32> %1 = alloc() : memref<128xi32> for %i0 = -1 to 9 { for %i1 = -1 to 9 { %2 = affine_apply #map0(%i0, %i1) %3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32> %4 = affine_apply #map1(%i0, %i1) %5 = load %1[%4] : memref<128xi32> } } return } - Improves productivity while manually / semi-automatically developing MLIR for testing / prototyping; also provides an indirect way to catch errors in transformations. - This pass is an easy way to test the underlying affine analysis machinery including low level routines. Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256. While on this: - create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/ - fix a bug in AffineAnalysis.cpp::toAffineExpr TODO: extend to non-constant loop bounds (straightforward). Will transparently work for all accesses once floordiv, mod, ceildiv are supported in the AffineMap -> FlatAffineConstraints conversion. PiperOrigin-RevId: 219397961
2018-10-31 01:43:06 +01:00
#include "mlir/Analysis/Passes.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/CFGFunction.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLFunction.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Module.h"
#include "mlir/Parser.h"
Introduce memref bound checking. Introduce analysis to check memref accesses (in MLFunctions) for out of bound ones. It works as follows: $ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ #map0 = (d0, d1) -> (d0, d1) #map1 = (d0, d1) -> (d0 * 128 - d1) mlfunc @test() { %0 = alloc() : memref<9x9xi32> %1 = alloc() : memref<128xi32> for %i0 = -1 to 9 { for %i1 = -1 to 9 { %2 = affine_apply #map0(%i0, %i1) %3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32> %4 = affine_apply #map1(%i0, %i1) %5 = load %1[%4] : memref<128xi32> } } return } - Improves productivity while manually / semi-automatically developing MLIR for testing / prototyping; also provides an indirect way to catch errors in transformations. - This pass is an easy way to test the underlying affine analysis machinery including low level routines. Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256. While on this: - create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/ - fix a bug in AffineAnalysis.cpp::toAffineExpr TODO: extend to non-constant loop bounds (straightforward). Will transparently work for all accesses once floordiv, mod, ceildiv are supported in the AffineMap -> FlatAffineConstraints conversion. PiperOrigin-RevId: 219397961
2018-10-31 01:43:06 +01:00
#include "mlir/Pass.h"
#include "mlir/TensorFlow/ControlFlowOps.h"
#include "mlir/TensorFlow/Passes.h"
#include "mlir/TensorFlowLite/Passes.h"
#include "mlir/Transforms/CFGFunctionViewGraph.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/XLA/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
using namespace mlir;
using namespace llvm;
using llvm::SMLoc;
static cl::opt<std::string>
inputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
static cl::opt<std::string>
outputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
cl::init("-"));
static cl::opt<bool>
splitInputFile("split-input-file",
cl::desc("Split the input file into pieces and process each "
"chunk independently"),
cl::init(false));
static cl::opt<bool>
verifyDiagnostics("verify",
cl::desc("Check that emitted diagnostics match "
"expected-* lines on the corresponding line"),
cl::init(false));
enum Passes {
Canonicalize,
ComposeAffineMaps,
ConstantFold,
ConvertToCFG,
TFLiteLegaize,
LoopFusion,
LoopTiling,
LoopUnroll,
LoopUnrollAndJam,
MemRefBoundCheck,
MemRefDependenceCheck,
Introduce loop body skewing / loop pipelining / loop shifting utility. - loopBodySkew shifts statements of a loop body by stmt-wise delays, and is typically meant to be used to: - allow overlap of non-blocking start/wait until completion operations with other computation - allow shifting of statements (for better register reuse/locality/parallelism) - software pipelining (when applied to the innermost loop) - an additional argument specifies whether to unroll the prologue and epilogue. - add method to check SSA dominance preservation. - add a fake loop pipeline pass to test this utility. Sample input/output are below. While on this, fix/add following: - fix minor bug in getAddMulPureAffineExpr - add additional builder methods for common affine map cases - fix const_operand_iterator's for ForStmt, etc. When there is no such thing as 'const MLValue', the iterator shouldn't be returning const MLValue's. Returning MLValue is const correct. Sample input/output examples: 1) Simplest case: shift second statement by one. Input: for %i = 0 to 7 { %y = "foo"(%i) : (affineint) -> affineint %x = "bar"(%i) : (affineint) -> affineint } Output: #map0 = (d0) -> (d0 - 1) mlfunc @loop_nest_simple1() { %c8 = constant 8 : affineint %c0 = constant 0 : affineint %0 = "foo"(%c0) : (affineint) -> affineint for %i0 = 1 to 7 { %1 = "foo"(%i0) : (affineint) -> affineint %2 = affine_apply #map0(%i0) %3 = "bar"(%2) : (affineint) -> affineint } %4 = affine_apply #map0(%c8) %5 = "bar"(%4) : (affineint) -> affineint return } 2) DMA overlap: shift dma.wait and compute by one. Input for %i = 0 to 7 { %pingpong = affine_apply (d0) -> (d0 mod 2) (%i) "dma.enqueue"(%pingpong) : (affineint) -> affineint %pongping = affine_apply (d0) -> (d0 mod 2) (%i) "dma.wait"(%pongping) : (affineint) -> affineint "compute1"(%pongping) : (affineint) -> affineint } Output #map0 = (d0) -> (d0 mod 2) #map1 = (d0) -> (d0 - 1) #map2 = ()[s0] -> (s0 + 7) mlfunc @loop_nest_dma() { %c8 = constant 8 : affineint %c0 = constant 0 : affineint %0 = affine_apply #map0(%c0) %1 = "dma.enqueue"(%0) : (affineint) -> affineint for %i0 = 1 to 7 { %2 = affine_apply #map0(%i0) %3 = "dma.enqueue"(%2) : (affineint) -> affineint %4 = affine_apply #map1(%i0) %5 = affine_apply #map0(%4) %6 = "dma.wait"(%5) : (affineint) -> affineint %7 = "compute1"(%5) : (affineint) -> affineint } %8 = affine_apply #map1(%c8) %9 = affine_apply #map0(%8) %10 = "dma.wait"(%9) : (affineint) -> affineint %11 = "compute1"(%9) : (affineint) -> affineint return } 3) With arbitrary affine bound maps: Shift last two statements by two. Input: for %i = %N to ()[s0] -> (s0 + 7)()[%N] { %y = "foo"(%i) : (affineint) -> affineint %x = "bar"(%i) : (affineint) -> affineint %z = "foo_bar"(%i) : (affineint) -> (affineint) "bar_foo"(%i) : (affineint) -> (affineint) } Output #map0 = ()[s0] -> (s0 + 1) #map1 = ()[s0] -> (s0 + 2) #map2 = ()[s0] -> (s0 + 7) #map3 = (d0) -> (d0 - 2) #map4 = ()[s0] -> (s0 + 8) #map5 = ()[s0] -> (s0 + 9) for %i0 = %arg0 to #map0()[%arg0] { %0 = "foo"(%i0) : (affineint) -> affineint %1 = "bar"(%i0) : (affineint) -> affineint } for %i1 = #map1()[%arg0] to #map2()[%arg0] { %2 = "foo"(%i1) : (affineint) -> affineint %3 = "bar"(%i1) : (affineint) -> affineint %4 = affine_apply #map3(%i1) %5 = "foo_bar"(%4) : (affineint) -> affineint %6 = "bar_foo"(%4) : (affineint) -> affineint } for %i2 = #map4()[%arg0] to #map5()[%arg0] { %7 = affine_apply #map3(%i2) %8 = "foo_bar"(%7) : (affineint) -> affineint %9 = "bar_foo"(%7) : (affineint) -> affineint } 4) Shift one by zero, second by one, third by two for %i = 0 to 7 { %y = "foo"(%i) : (affineint) -> affineint %x = "bar"(%i) : (affineint) -> affineint %z = "foobar"(%i) : (affineint) -> affineint } #map0 = (d0) -> (d0 - 1) #map1 = (d0) -> (d0 - 2) #map2 = ()[s0] -> (s0 + 7) %c9 = constant 9 : affineint %c8 = constant 8 : affineint %c1 = constant 1 : affineint %c0 = constant 0 : affineint %0 = "foo"(%c0) : (affineint) -> affineint %1 = "foo"(%c1) : (affineint) -> affineint %2 = affine_apply #map0(%c1) %3 = "bar"(%2) : (affineint) -> affineint for %i0 = 2 to 7 { %4 = "foo"(%i0) : (affineint) -> affineint %5 = affine_apply #map0(%i0) %6 = "bar"(%5) : (affineint) -> affineint %7 = affine_apply #map1(%i0) %8 = "foobar"(%7) : (affineint) -> affineint } %9 = affine_apply #map0(%c8) %10 = "bar"(%9) : (affineint) -> affineint %11 = affine_apply #map1(%c8) %12 = "foobar"(%11) : (affineint) -> affineint %13 = affine_apply #map1(%c9) %14 = "foobar"(%13) : (affineint) -> affineint 5) SSA dominance violated; no shifting if a shift is specified for the second statement. for %i = 0 to 7 { %x = "foo"(%i) : (affineint) -> affineint "bar"(%x) : (affineint) -> affineint } PiperOrigin-RevId: 214975731
2018-09-28 21:17:26 +02:00
PipelineDataTransfer,
PrintCFGGraph,
SimplifyAffineStructures,
TFRaiseControlFlow,
Introduce memref bound checking. Introduce analysis to check memref accesses (in MLFunctions) for out of bound ones. It works as follows: $ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ #map0 = (d0, d1) -> (d0, d1) #map1 = (d0, d1) -> (d0 * 128 - d1) mlfunc @test() { %0 = alloc() : memref<9x9xi32> %1 = alloc() : memref<128xi32> for %i0 = -1 to 9 { for %i1 = -1 to 9 { %2 = affine_apply #map0(%i0, %i1) %3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32> %4 = affine_apply #map1(%i0, %i1) %5 = load %1[%4] : memref<128xi32> } } return } - Improves productivity while manually / semi-automatically developing MLIR for testing / prototyping; also provides an indirect way to catch errors in transformations. - This pass is an easy way to test the underlying affine analysis machinery including low level routines. Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256. While on this: - create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/ - fix a bug in AffineAnalysis.cpp::toAffineExpr TODO: extend to non-constant loop bounds (straightforward). Will transparently work for all accesses once floordiv, mod, ceildiv are supported in the AffineMap -> FlatAffineConstraints conversion. PiperOrigin-RevId: 219397961
2018-10-31 01:43:06 +01:00
Vectorize,
XLALower,
};
static cl::list<Passes> passList(
"", cl::desc("Compiler passes to run"),
cl::values(
clEnumValN(Canonicalize, "canonicalize", "Canonicalize operations"),
clEnumValN(ComposeAffineMaps, "compose-affine-maps",
"Compose affine maps"),
clEnumValN(ConstantFold, "constant-fold",
"Constant fold operations in functions"),
clEnumValN(ConvertToCFG, "convert-to-cfg",
"Convert all ML functions in the module to CFG ones"),
clEnumValN(LoopFusion, "loop-fusion", "Fuse loop nests"),
clEnumValN(LoopTiling, "loop-tile", "Tile loop nests"),
clEnumValN(LoopUnroll, "loop-unroll", "Unroll loops"),
clEnumValN(LoopUnrollAndJam, "loop-unroll-jam", "Unroll and jam loops"),
Introduce memref bound checking. Introduce analysis to check memref accesses (in MLFunctions) for out of bound ones. It works as follows: $ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ #map0 = (d0, d1) -> (d0, d1) #map1 = (d0, d1) -> (d0 * 128 - d1) mlfunc @test() { %0 = alloc() : memref<9x9xi32> %1 = alloc() : memref<128xi32> for %i0 = -1 to 9 { for %i1 = -1 to 9 { %2 = affine_apply #map0(%i0, %i1) %3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32> %4 = affine_apply #map1(%i0, %i1) %5 = load %1[%4] : memref<128xi32> } } return } - Improves productivity while manually / semi-automatically developing MLIR for testing / prototyping; also provides an indirect way to catch errors in transformations. - This pass is an easy way to test the underlying affine analysis machinery including low level routines. Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256. While on this: - create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/ - fix a bug in AffineAnalysis.cpp::toAffineExpr TODO: extend to non-constant loop bounds (straightforward). Will transparently work for all accesses once floordiv, mod, ceildiv are supported in the AffineMap -> FlatAffineConstraints conversion. PiperOrigin-RevId: 219397961
2018-10-31 01:43:06 +01:00
clEnumValN(MemRefBoundCheck, "memref-bound-check",
"Convert all ML functions in the module to CFG ones"),
clEnumValN(MemRefDependenceCheck, "memref-dependence-check",
"Checks dependences between all pairs of memref accesses."),
clEnumValN(PipelineDataTransfer, "pipeline-data-transfer",
"Pipeline non-blocking data transfers between"
"explicitly managed levels of the memory hierarchy"),
clEnumValN(PrintCFGGraph, "print-cfg-graph",
"Print CFG graph per function"),
clEnumValN(SimplifyAffineStructures, "simplify-affine-structures",
"Simplify affine expressions"),
clEnumValN(TFLiteLegaize, "tfl-legalize",
"Legalize operations to TensorFlow Lite dialect"),
clEnumValN(TFRaiseControlFlow, "tf-raise-control-flow",
"Dynamic TensorFlow Switch/Match nodes to a CFG"),
Introduce memref bound checking. Introduce analysis to check memref accesses (in MLFunctions) for out of bound ones. It works as follows: $ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ #map0 = (d0, d1) -> (d0, d1) #map1 = (d0, d1) -> (d0 * 128 - d1) mlfunc @test() { %0 = alloc() : memref<9x9xi32> %1 = alloc() : memref<128xi32> for %i0 = -1 to 9 { for %i1 = -1 to 9 { %2 = affine_apply #map0(%i0, %i1) %3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32> %4 = affine_apply #map1(%i0, %i1) %5 = load %1[%4] : memref<128xi32> } } return } - Improves productivity while manually / semi-automatically developing MLIR for testing / prototyping; also provides an indirect way to catch errors in transformations. - This pass is an easy way to test the underlying affine analysis machinery including low level routines. Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256. While on this: - create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/ - fix a bug in AffineAnalysis.cpp::toAffineExpr TODO: extend to non-constant loop bounds (straightforward). Will transparently work for all accesses once floordiv, mod, ceildiv are supported in the AffineMap -> FlatAffineConstraints conversion. PiperOrigin-RevId: 219397961
2018-10-31 01:43:06 +01:00
clEnumValN(Vectorize, "vectorize",
"Vectorize to a target independent n-D vector abstraction."),
clEnumValN(XLALower, "xla-lower", "Lower to XLA dialect")));
enum OptResult { OptSuccess, OptFailure };
/// Open the specified output file and return it, exiting if there is any I/O or
/// other errors.
static std::unique_ptr<ToolOutputFile> getOutputStream() {
std::error_code error;
auto result =
llvm::make_unique<ToolOutputFile>(outputFilename, error, sys::fs::F_None);
if (error) {
llvm::errs() << error.message() << '\n';
exit(1);
}
return result;
}
/// Given a MemoryBuffer along with a line and column within it, return the
/// location being referenced.
static SMLoc getLocFromLineAndCol(MemoryBuffer &membuf, unsigned lineNo,
unsigned columnNo) {
// TODO: This should really be upstreamed to be a method on llvm::SourceMgr.
// Doing so would allow it to use the offset cache that is already maintained
// by SrcBuffer, making this more efficient.
// Scan for the correct line number.
const char *position = membuf.getBufferStart();
const char *end = membuf.getBufferEnd();
// We start counting line and column numbers from 1.
--lineNo;
--columnNo;
while (position < end && lineNo) {
auto curChar = *position++;
// Scan for newlines. If this isn't one, ignore it.
if (curChar != '\r' && curChar != '\n')
continue;
// We saw a line break, decrement our counter.
--lineNo;
// Check for \r\n and \n\r and treat it as a single escape. We know that
// looking past one character is safe because MemoryBuffer's are always nul
// terminated.
if (*position != curChar && (*position == '\r' || *position == '\n'))
++position;
}
// If the line/column counter was invalid, return a pointer to the start of
// the buffer.
if (lineNo || position + columnNo > end)
return SMLoc::getFromPointer(membuf.getBufferStart());
// Otherwise return the right pointer.
return SMLoc::getFromPointer(position + columnNo);
}
/// Perform the actions on the input file indicated by the command line flags
/// within the specified context.
///
/// This typically parses the main source file, runs zero or more optimization
/// passes, then prints the output.
///
static OptResult performActions(SourceMgr &sourceMgr, MLIRContext *context) {
std::unique_ptr<Module> module(parseSourceFile(sourceMgr, context));
if (!module)
return OptFailure;
// Run each of the passes that were selected.
for (unsigned i = 0, e = passList.size(); i != e; ++i) {
auto passKind = passList[i];
Pass *pass = nullptr;
switch (passKind) {
case Canonicalize:
pass = createCanonicalizerPass();
break;
case ComposeAffineMaps:
pass = createComposeAffineMapsPass();
break;
case ConstantFold:
pass = createConstantFoldPass();
break;
case ConvertToCFG:
pass = createConvertToCFGPass();
break;
case LoopFusion:
pass = createLoopFusionPass();
break;
case LoopTiling:
pass = createLoopTilingPass();
break;
case LoopUnroll:
pass = createLoopUnrollPass();
break;
case LoopUnrollAndJam:
pass = createLoopUnrollAndJamPass();
break;
case MemRefBoundCheck:
pass = createMemRefBoundCheckPass();
break;
case MemRefDependenceCheck:
pass = createMemRefDependenceCheckPass();
break;
Introduce loop body skewing / loop pipelining / loop shifting utility. - loopBodySkew shifts statements of a loop body by stmt-wise delays, and is typically meant to be used to: - allow overlap of non-blocking start/wait until completion operations with other computation - allow shifting of statements (for better register reuse/locality/parallelism) - software pipelining (when applied to the innermost loop) - an additional argument specifies whether to unroll the prologue and epilogue. - add method to check SSA dominance preservation. - add a fake loop pipeline pass to test this utility. Sample input/output are below. While on this, fix/add following: - fix minor bug in getAddMulPureAffineExpr - add additional builder methods for common affine map cases - fix const_operand_iterator's for ForStmt, etc. When there is no such thing as 'const MLValue', the iterator shouldn't be returning const MLValue's. Returning MLValue is const correct. Sample input/output examples: 1) Simplest case: shift second statement by one. Input: for %i = 0 to 7 { %y = "foo"(%i) : (affineint) -> affineint %x = "bar"(%i) : (affineint) -> affineint } Output: #map0 = (d0) -> (d0 - 1) mlfunc @loop_nest_simple1() { %c8 = constant 8 : affineint %c0 = constant 0 : affineint %0 = "foo"(%c0) : (affineint) -> affineint for %i0 = 1 to 7 { %1 = "foo"(%i0) : (affineint) -> affineint %2 = affine_apply #map0(%i0) %3 = "bar"(%2) : (affineint) -> affineint } %4 = affine_apply #map0(%c8) %5 = "bar"(%4) : (affineint) -> affineint return } 2) DMA overlap: shift dma.wait and compute by one. Input for %i = 0 to 7 { %pingpong = affine_apply (d0) -> (d0 mod 2) (%i) "dma.enqueue"(%pingpong) : (affineint) -> affineint %pongping = affine_apply (d0) -> (d0 mod 2) (%i) "dma.wait"(%pongping) : (affineint) -> affineint "compute1"(%pongping) : (affineint) -> affineint } Output #map0 = (d0) -> (d0 mod 2) #map1 = (d0) -> (d0 - 1) #map2 = ()[s0] -> (s0 + 7) mlfunc @loop_nest_dma() { %c8 = constant 8 : affineint %c0 = constant 0 : affineint %0 = affine_apply #map0(%c0) %1 = "dma.enqueue"(%0) : (affineint) -> affineint for %i0 = 1 to 7 { %2 = affine_apply #map0(%i0) %3 = "dma.enqueue"(%2) : (affineint) -> affineint %4 = affine_apply #map1(%i0) %5 = affine_apply #map0(%4) %6 = "dma.wait"(%5) : (affineint) -> affineint %7 = "compute1"(%5) : (affineint) -> affineint } %8 = affine_apply #map1(%c8) %9 = affine_apply #map0(%8) %10 = "dma.wait"(%9) : (affineint) -> affineint %11 = "compute1"(%9) : (affineint) -> affineint return } 3) With arbitrary affine bound maps: Shift last two statements by two. Input: for %i = %N to ()[s0] -> (s0 + 7)()[%N] { %y = "foo"(%i) : (affineint) -> affineint %x = "bar"(%i) : (affineint) -> affineint %z = "foo_bar"(%i) : (affineint) -> (affineint) "bar_foo"(%i) : (affineint) -> (affineint) } Output #map0 = ()[s0] -> (s0 + 1) #map1 = ()[s0] -> (s0 + 2) #map2 = ()[s0] -> (s0 + 7) #map3 = (d0) -> (d0 - 2) #map4 = ()[s0] -> (s0 + 8) #map5 = ()[s0] -> (s0 + 9) for %i0 = %arg0 to #map0()[%arg0] { %0 = "foo"(%i0) : (affineint) -> affineint %1 = "bar"(%i0) : (affineint) -> affineint } for %i1 = #map1()[%arg0] to #map2()[%arg0] { %2 = "foo"(%i1) : (affineint) -> affineint %3 = "bar"(%i1) : (affineint) -> affineint %4 = affine_apply #map3(%i1) %5 = "foo_bar"(%4) : (affineint) -> affineint %6 = "bar_foo"(%4) : (affineint) -> affineint } for %i2 = #map4()[%arg0] to #map5()[%arg0] { %7 = affine_apply #map3(%i2) %8 = "foo_bar"(%7) : (affineint) -> affineint %9 = "bar_foo"(%7) : (affineint) -> affineint } 4) Shift one by zero, second by one, third by two for %i = 0 to 7 { %y = "foo"(%i) : (affineint) -> affineint %x = "bar"(%i) : (affineint) -> affineint %z = "foobar"(%i) : (affineint) -> affineint } #map0 = (d0) -> (d0 - 1) #map1 = (d0) -> (d0 - 2) #map2 = ()[s0] -> (s0 + 7) %c9 = constant 9 : affineint %c8 = constant 8 : affineint %c1 = constant 1 : affineint %c0 = constant 0 : affineint %0 = "foo"(%c0) : (affineint) -> affineint %1 = "foo"(%c1) : (affineint) -> affineint %2 = affine_apply #map0(%c1) %3 = "bar"(%2) : (affineint) -> affineint for %i0 = 2 to 7 { %4 = "foo"(%i0) : (affineint) -> affineint %5 = affine_apply #map0(%i0) %6 = "bar"(%5) : (affineint) -> affineint %7 = affine_apply #map1(%i0) %8 = "foobar"(%7) : (affineint) -> affineint } %9 = affine_apply #map0(%c8) %10 = "bar"(%9) : (affineint) -> affineint %11 = affine_apply #map1(%c8) %12 = "foobar"(%11) : (affineint) -> affineint %13 = affine_apply #map1(%c9) %14 = "foobar"(%13) : (affineint) -> affineint 5) SSA dominance violated; no shifting if a shift is specified for the second statement. for %i = 0 to 7 { %x = "foo"(%i) : (affineint) -> affineint "bar"(%x) : (affineint) -> affineint } PiperOrigin-RevId: 214975731
2018-09-28 21:17:26 +02:00
case PipelineDataTransfer:
pass = createPipelineDataTransferPass();
break;
case PrintCFGGraph:
pass = createPrintCFGGraphPass();
break;
case SimplifyAffineStructures:
pass = createSimplifyAffineStructuresPass();
break;
case TFLiteLegaize:
pass = tfl::createLegalizer();
break;
case TFRaiseControlFlow:
pass = createRaiseTFControlFlowPass();
break;
Introduce memref bound checking. Introduce analysis to check memref accesses (in MLFunctions) for out of bound ones. It works as follows: $ mlir-opt -memref-bound-check test/Transforms/memref-bound-check.mlir /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:10:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#2 %x = load %A[%idxtensorflow/mlir#0, %idxtensorflow/mlir#1] : memref<9 x 9 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of upper bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ /tmp/single.mlir:12:12: error: 'load' op memref out of lower bound access along dimension tensorflow/mlir#1 %y = load %B[%idy] : memref<128 x i32> ^ #map0 = (d0, d1) -> (d0, d1) #map1 = (d0, d1) -> (d0 * 128 - d1) mlfunc @test() { %0 = alloc() : memref<9x9xi32> %1 = alloc() : memref<128xi32> for %i0 = -1 to 9 { for %i1 = -1 to 9 { %2 = affine_apply #map0(%i0, %i1) %3 = load %0[%2tensorflow/mlir#0, %2tensorflow/mlir#1] : memref<9x9xi32> %4 = affine_apply #map1(%i0, %i1) %5 = load %1[%4] : memref<128xi32> } } return } - Improves productivity while manually / semi-automatically developing MLIR for testing / prototyping; also provides an indirect way to catch errors in transformations. - This pass is an easy way to test the underlying affine analysis machinery including low level routines. Some code (in getMemoryRegion()) borrowed from @andydavis cl/218263256. While on this: - create mlir/Analysis/Passes.h; move Pass.h up from mlir/Transforms/ to mlir/ - fix a bug in AffineAnalysis.cpp::toAffineExpr TODO: extend to non-constant loop bounds (straightforward). Will transparently work for all accesses once floordiv, mod, ceildiv are supported in the AffineMap -> FlatAffineConstraints conversion. PiperOrigin-RevId: 219397961
2018-10-31 01:43:06 +01:00
case Vectorize:
pass = createVectorizePass();
break;
case XLALower:
pass = createXLALowerPass();
break;
}
PassResult result = pass->runOnModule(module.get());
delete pass;
if (result)
return OptFailure;
// Verify that the result of the pass is still valid.
if (module->verify())
return OptFailure;
}
// Print the output.
auto output = getOutputStream();
module->print(output->os());
output->keep();
return OptSuccess;
}
/// Given a diagnostic kind, return a human readable string for it.
static StringRef getDiagnosticKindString(MLIRContext::DiagnosticKind kind) {
switch (kind) {
case MLIRContext::DiagnosticKind::Note:
return "note";
case MLIRContext::DiagnosticKind::Warning:
return "warning";
case MLIRContext::DiagnosticKind::Error:
return "error";
}
}
/// Parses the memory buffer. If successfully, run a series of passes against
/// it and print the result.
static OptResult processFile(std::unique_ptr<MemoryBuffer> ownedBuffer) {
// Tell sourceMgr about this buffer, which is what the parser will pick up.
SourceMgr sourceMgr;
auto &buffer = *ownedBuffer;
sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), SMLoc());
// Parse the input file.
MLIRContext context;
// If we are in verify mode then we have a lot of work to do, otherwise just
// perform the actions without worrying about it.
if (!verifyDiagnostics) {
// Register a simple diagnostic handler that prints out info with context.
context.registerDiagnosticHandler([&](Location *location, StringRef message,
MLIRContext::DiagnosticKind kind) {
unsigned line = 1, column = 1;
if (auto fileLoc = dyn_cast<FileLineColLoc>(location)) {
line = fileLoc->getLine();
column = fileLoc->getColumn();
}
auto unexpectedLoc = getLocFromLineAndCol(buffer, line, column);
sourceMgr.PrintMessage(unexpectedLoc, SourceMgr::DK_Error, message);
});
// Run the test actions.
return performActions(sourceMgr, &context);
}
// Keep track of the result of this file processing. If there are no issues,
// then we succeed.
auto result = OptSuccess;
// Record the expected diagnostic's position, substring and whether it was
// seen.
struct ExpectedDiag {
MLIRContext::DiagnosticKind kind;
unsigned lineNo;
StringRef substring;
SMLoc fileLoc;
bool matched = false;
};
SmallVector<ExpectedDiag, 2> expectedDiags;
// Error checker that verifies reported error was expected.
auto checker = [&](Location *location, StringRef message,
MLIRContext::DiagnosticKind kind) {
unsigned line = 1, column = 1;
if (auto *fileLoc = dyn_cast<FileLineColLoc>(location)) {
line = fileLoc->getLine();
column = fileLoc->getColumn();
}
// If we find something that is close then emit a more specific error.
ExpectedDiag *nearMiss = nullptr;
// If this was an expected error, remember that we saw it and return.
for (auto &e : expectedDiags) {
if (line == e.lineNo && message.contains(e.substring)) {
if (e.kind == kind) {
e.matched = true;
return;
}
// If this only differs based on the diagnostic kind, then consider it
// to be a near miss.
nearMiss = &e;
}
}
// If there was a near miss, emit a specific diagnostic.
if (nearMiss) {
sourceMgr.PrintMessage(nearMiss->fileLoc, SourceMgr::DK_Error,
"'" + getDiagnosticKindString(kind) +
"' diagnostic emitted when expecting a '" +
getDiagnosticKindString(nearMiss->kind) + "'");
result = OptFailure;
return;
}
// If this error wasn't expected, produce an error out of mlir-opt saying
// so.
auto unexpectedLoc = getLocFromLineAndCol(buffer, line, column);
sourceMgr.PrintMessage(unexpectedLoc, SourceMgr::DK_Error,
"unexpected error: " + Twine(message));
result = OptFailure;
};
// Scan the file for expected-* designators and register a callback for the
// error handler.
// Extract the expected errors from the file.
llvm::Regex expected(
"expected-(error|note|warning) *(@[+-][0-9]+)? *{{(.*)}}");
SmallVector<StringRef, 100> lines;
buffer.getBuffer().split(lines, '\n');
for (unsigned lineNo = 0, e = lines.size(); lineNo < e; ++lineNo) {
SmallVector<StringRef, 3> matches;
if (expected.match(lines[lineNo], &matches)) {
// Point to the start of expected-*.
SMLoc expectedStart = SMLoc::getFromPointer(matches[0].data());
MLIRContext::DiagnosticKind kind;
if (matches[1] == "error")
kind = MLIRContext::DiagnosticKind::Error;
else if (matches[1] == "warning")
kind = MLIRContext::DiagnosticKind::Warning;
else {
assert(matches[1] == "note");
kind = MLIRContext::DiagnosticKind::Note;
}
ExpectedDiag record{kind, lineNo + 1, matches[3], expectedStart, false};
auto offsetMatch = matches[2];
if (!offsetMatch.empty()) {
int offset;
// Get the integer value without the @ and +/- prefix.
if (!offsetMatch.drop_front(2).getAsInteger(0, offset)) {
if (offsetMatch[1] == '+')
record.lineNo += offset;
else
record.lineNo -= offset;
}
}
expectedDiags.push_back(record);
}
}
// Finally, register the error handler to capture them.
context.registerDiagnosticHandler(checker);
// Do any processing requested by command line flags. We don't care whether
// these actions succeed or fail, we only care what diagnostics they produce
// and whether they match our expectations.
performActions(sourceMgr, &context);
// Verify that all expected errors were seen.
for (auto &err : expectedDiags) {
if (!err.matched) {
SMRange range(err.fileLoc,
SMLoc::getFromPointer(err.fileLoc.getPointer() +
err.substring.size()));
auto kind = getDiagnosticKindString(err.kind);
sourceMgr.PrintMessage(err.fileLoc, SourceMgr::DK_Error,
"expected " + kind + " \"" + err.substring +
"\" was not produced",
range);
result = OptFailure;
}
}
return result;
}
/// Split the specified file on a marker and process each chunk independently
/// according to the normal processFile logic. This is primarily used to
/// allow a large number of small independent parser tests to be put into a
/// single test, but could be used for other purposes as well.
static OptResult
splitAndProcessFile(std::unique_ptr<MemoryBuffer> originalBuffer) {
const char marker[] = "-----";
auto *origMemBuffer = originalBuffer.get();
SmallVector<StringRef, 8> sourceBuffers;
origMemBuffer->getBuffer().split(sourceBuffers, marker);
// Add the original buffer to the source manager.
SourceMgr fileSourceMgr;
fileSourceMgr.AddNewSourceBuffer(std::move(originalBuffer), SMLoc());
bool hadUnexpectedResult = false;
// Process each chunk in turn. If any fails, then return a failure of the
// tool.
for (auto &subBuffer : sourceBuffers) {
auto splitLoc = SMLoc::getFromPointer(subBuffer.data());
unsigned splitLine = fileSourceMgr.getLineAndColumn(splitLoc).first;
auto subMemBuffer = MemoryBuffer::getMemBufferCopy(
subBuffer, origMemBuffer->getBufferIdentifier() +
Twine(" split at line #") + Twine(splitLine));
if (processFile(std::move(subMemBuffer)))
hadUnexpectedResult = true;
}
return hadUnexpectedResult ? OptFailure : OptSuccess;
}
int main(int argc, char **argv) {
llvm::PrettyStackTraceProgram x(argc, argv);
InitLLVM y(argc, argv);
cl::ParseCommandLineOptions(argc, argv, "MLIR modular optimizer driver\n");
// Set up the input file.
auto fileOrErr = MemoryBuffer::getFileOrSTDIN(inputFilename);
if (std::error_code error = fileOrErr.getError()) {
llvm::errs() << argv[0] << ": could not open input file '" << inputFilename
<< "': " << error.message() << "\n";
return 1;
}
// The split-input-file mode is a very specific mode that slices the file
// up into small pieces and checks each independently.
if (splitInputFile)
return splitAndProcessFile(std::move(*fileOrErr));
return processFile(std::move(*fileOrErr));
}