3655069234
This commit moves FuncOp out of the builtin dialect, and into the Func dialect. This move has been planned in some capacity from the moment we made FuncOp an operation (years ago). This commit handles the functional aspects of the move, but various aspects are left untouched to ease migration: func::FuncOp is re-exported into mlir to reduce the actual API churn, the assembly format still accepts the unqualified `func`. These temporary measures will remain for a little while to simplify migration before being removed. Differential Revision: https://reviews.llvm.org/D121266
122 lines
4.5 KiB
Python
122 lines
4.5 KiB
Python
"""This file contains benchmarks for sparse tensors. In particular, it
|
|
contains benchmarks for both mlir sparse tensor dialect and numpy so that they
|
|
can be compared against each other.
|
|
"""
|
|
import ctypes
|
|
import numpy as np
|
|
import os
|
|
import re
|
|
import time
|
|
|
|
from mlir import ir
|
|
from mlir import runtime as rt
|
|
from mlir.dialects import builtin
|
|
from mlir.dialects.linalg.opdsl import lang as dsl
|
|
from mlir.execution_engine import ExecutionEngine
|
|
|
|
from common import create_sparse_np_tensor
|
|
from common import emit_timer_func
|
|
from common import emit_benchmark_wrapped_main_func
|
|
from common import get_kernel_func_from_module
|
|
from common import setup_passes
|
|
|
|
|
|
@dsl.linalg_structured_op
|
|
def matmul_dsl(
|
|
A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
|
|
B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
|
|
C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)
|
|
):
|
|
"""Helper function for mlir sparse matrix multiplication benchmark."""
|
|
C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
|
|
|
|
|
|
def benchmark_sparse_mlir_multiplication():
|
|
"""Benchmark for mlir sparse matrix multiplication. Because its an
|
|
MLIR benchmark we need to return both a `compiler` function and a `runner`
|
|
function.
|
|
"""
|
|
with ir.Context(), ir.Location.unknown():
|
|
module = ir.Module.create()
|
|
f64 = ir.F64Type.get()
|
|
param1_type = ir.RankedTensorType.get([1000, 1500], f64)
|
|
param2_type = ir.RankedTensorType.get([1500, 2000], f64)
|
|
result_type = ir.RankedTensorType.get([1000, 2000], f64)
|
|
with ir.InsertionPoint(module.body):
|
|
@func.FuncOp.from_py_func(param1_type, param2_type, result_type)
|
|
def sparse_kernel(x, y, z):
|
|
return matmul_dsl(x, y, outs=[z])
|
|
|
|
def compiler():
|
|
with ir.Context(), ir.Location.unknown():
|
|
kernel_func = get_kernel_func_from_module(module)
|
|
timer_func = emit_timer_func()
|
|
wrapped_func = emit_benchmark_wrapped_main_func(
|
|
kernel_func,
|
|
timer_func
|
|
)
|
|
main_module_with_benchmark = ir.Module.parse(
|
|
str(timer_func) + str(wrapped_func) + str(kernel_func)
|
|
)
|
|
setup_passes(main_module_with_benchmark)
|
|
c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")
|
|
assert os.path.exists(c_runner_utils),\
|
|
f"{c_runner_utils} does not exist." \
|
|
f" Please pass a valid value for" \
|
|
f" MLIR_C_RUNNER_UTILS environment variable."
|
|
runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")
|
|
assert os.path.exists(runner_utils),\
|
|
f"{runner_utils} does not exist." \
|
|
f" Please pass a valid value for MLIR_RUNNER_UTILS" \
|
|
f" environment variable."
|
|
|
|
engine = ExecutionEngine(
|
|
main_module_with_benchmark,
|
|
3,
|
|
shared_libs=[c_runner_utils, runner_utils]
|
|
)
|
|
return engine.invoke
|
|
|
|
def runner(engine_invoke):
|
|
compiled_program_args = []
|
|
for argument_type in [
|
|
result_type, param1_type, param2_type, result_type
|
|
]:
|
|
argument_type_str = str(argument_type)
|
|
dimensions_str = re.sub("<|>|tensor", "", argument_type_str)
|
|
dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]
|
|
if argument_type == result_type:
|
|
argument = np.zeros(dimensions, np.float64)
|
|
else:
|
|
argument = create_sparse_np_tensor(dimensions, 1000)
|
|
compiled_program_args.append(
|
|
ctypes.pointer(
|
|
ctypes.pointer(rt.get_ranked_memref_descriptor(argument))
|
|
)
|
|
)
|
|
np_timers_ns = np.array([0], dtype=np.int64)
|
|
compiled_program_args.append(
|
|
ctypes.pointer(
|
|
ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))
|
|
)
|
|
)
|
|
engine_invoke("main", *compiled_program_args)
|
|
return int(np_timers_ns[0])
|
|
|
|
return compiler, runner
|
|
|
|
|
|
def benchmark_np_matrix_multiplication():
|
|
"""Benchmark for numpy matrix multiplication. Because its a python
|
|
benchmark, we don't have any `compiler` function returned. We just return
|
|
the `runner` function.
|
|
"""
|
|
def runner():
|
|
argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))
|
|
argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))
|
|
start_time = time.time_ns()
|
|
np.matmul(argument1, argument2)
|
|
return time.time_ns() - start_time
|
|
|
|
return None, runner
|