llvm/mlir/benchmark/python/benchmark_sparse.py
River Riddle 3655069234 [mlir] Move the Builtin FuncOp to the Func dialect
This commit moves FuncOp out of the builtin dialect, and into the Func
dialect. This move has been planned in some capacity from the moment
we made FuncOp an operation (years ago). This commit handles the
functional aspects of the move, but various aspects are left untouched
to ease migration: func::FuncOp is re-exported into mlir to reduce
the actual API churn, the assembly format still accepts the unqualified
`func`. These temporary measures will remain for a little while to
simplify migration before being removed.

Differential Revision: https://reviews.llvm.org/D121266
2022-03-16 17:07:03 -07:00

122 lines
4.5 KiB
Python

"""This file contains benchmarks for sparse tensors. In particular, it
contains benchmarks for both mlir sparse tensor dialect and numpy so that they
can be compared against each other.
"""
import ctypes
import numpy as np
import os
import re
import time
from mlir import ir
from mlir import runtime as rt
from mlir.dialects import builtin
from mlir.dialects.linalg.opdsl import lang as dsl
from mlir.execution_engine import ExecutionEngine
from common import create_sparse_np_tensor
from common import emit_timer_func
from common import emit_benchmark_wrapped_main_func
from common import get_kernel_func_from_module
from common import setup_passes
@dsl.linalg_structured_op
def matmul_dsl(
A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)
):
"""Helper function for mlir sparse matrix multiplication benchmark."""
C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
def benchmark_sparse_mlir_multiplication():
"""Benchmark for mlir sparse matrix multiplication. Because its an
MLIR benchmark we need to return both a `compiler` function and a `runner`
function.
"""
with ir.Context(), ir.Location.unknown():
module = ir.Module.create()
f64 = ir.F64Type.get()
param1_type = ir.RankedTensorType.get([1000, 1500], f64)
param2_type = ir.RankedTensorType.get([1500, 2000], f64)
result_type = ir.RankedTensorType.get([1000, 2000], f64)
with ir.InsertionPoint(module.body):
@func.FuncOp.from_py_func(param1_type, param2_type, result_type)
def sparse_kernel(x, y, z):
return matmul_dsl(x, y, outs=[z])
def compiler():
with ir.Context(), ir.Location.unknown():
kernel_func = get_kernel_func_from_module(module)
timer_func = emit_timer_func()
wrapped_func = emit_benchmark_wrapped_main_func(
kernel_func,
timer_func
)
main_module_with_benchmark = ir.Module.parse(
str(timer_func) + str(wrapped_func) + str(kernel_func)
)
setup_passes(main_module_with_benchmark)
c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")
assert os.path.exists(c_runner_utils),\
f"{c_runner_utils} does not exist." \
f" Please pass a valid value for" \
f" MLIR_C_RUNNER_UTILS environment variable."
runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")
assert os.path.exists(runner_utils),\
f"{runner_utils} does not exist." \
f" Please pass a valid value for MLIR_RUNNER_UTILS" \
f" environment variable."
engine = ExecutionEngine(
main_module_with_benchmark,
3,
shared_libs=[c_runner_utils, runner_utils]
)
return engine.invoke
def runner(engine_invoke):
compiled_program_args = []
for argument_type in [
result_type, param1_type, param2_type, result_type
]:
argument_type_str = str(argument_type)
dimensions_str = re.sub("<|>|tensor", "", argument_type_str)
dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]
if argument_type == result_type:
argument = np.zeros(dimensions, np.float64)
else:
argument = create_sparse_np_tensor(dimensions, 1000)
compiled_program_args.append(
ctypes.pointer(
ctypes.pointer(rt.get_ranked_memref_descriptor(argument))
)
)
np_timers_ns = np.array([0], dtype=np.int64)
compiled_program_args.append(
ctypes.pointer(
ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))
)
)
engine_invoke("main", *compiled_program_args)
return int(np_timers_ns[0])
return compiler, runner
def benchmark_np_matrix_multiplication():
"""Benchmark for numpy matrix multiplication. Because its a python
benchmark, we don't have any `compiler` function returned. We just return
the `runner` function.
"""
def runner():
argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))
argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))
start_time = time.time_ns()
np.matmul(argument1, argument2)
return time.time_ns() - start_time
return None, runner