Source code for mindspore_lite.model

# Copyright 2022 - 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Model API.
"""

from __future__ import absolute_import

__all__ = [
    "ModelType",
    "Model",
    "ModelParallelRunner",
    "ModelGroup",
    "MultiModelRunner",
    "ModelExecutor",
]
import os
import logging
from enum import Enum
import numpy as np

from mindspore_lite._checkparam import check_isinstance
from mindspore_lite.context import Context
from mindspore_lite.lib import _c_lite_wrapper
from mindspore_lite.tensor import Tensor, TensorMeta
from mindspore_lite.base_model import BaseModel
from mindspore_lite._parse_update_weights_name import (
    _parse_update_weight_config_name,
    _rename_variable_weight,
)



[docs]
class ModelType(Enum):
    """
    The `ModelType` class defines the type of the model exported or imported in MindSpot Lite.

    Used in the following scenarios:

    1. When using `mindspore_lite.Converter`, set `save_type` parameter, `ModelType` used to define the model type
    generated by Converter. ``ModelType.MINDIR`` is recommended.

    2. After using `mindspore_lite.Converter`, when loading or building a model from file for predicting, the
    `ModelType` is used to define Input model framework type. Only support ``ModelType.MINDIR``.

    Currently, the following `ModelType` are supported:

    ===========================  =======================================================================
    Definition                    Description
    ===========================  =======================================================================
    `ModelType.MINDIR`           MindSpore model's framework type, which model uses .mindir as suffix.
    `ModelType.MINDIR_LITE`      MindSpore Lite model's framework type, which model uses .ms as suffix.
    ===========================  =======================================================================

    Examples:
        >>> # Method 1: Import mindspore_lite package
        >>> import mindspore_lite as mslite
        >>> print(mslite.ModelType.MINDIR)
        ModelType.MINDIR
        >>> # Method 2: from mindspore_lite package import ModelType
        >>> from mindspore_lite import ModelType
        >>> print(ModelType.MINDIR)
        ModelType.MINDIR
    """

    MINDIR = 0
    MINDIR_LITE = 4



MODEL_TYPE_PY_CXX_MAP = {
    ModelType.MINDIR: _c_lite_wrapper.ModelType.kMindIR,
    ModelType.MINDIR_LITE: _c_lite_wrapper.ModelType.kMindIR_Lite,
}

MODEL_TYPE_CXX_PY_MAP = {
    _c_lite_wrapper.ModelType.kMindIR: ModelType.MINDIR,
    _c_lite_wrapper.ModelType.kMindIR_Lite: ModelType.MINDIR_LITE,
}


def set_env(func):
    """Set env for Ascend custom opp."""

    def wrapper(*args, **kwargs):
        if not os.getenv("ASCEND_CUSTOM_OPP_PATH"):
            logging.warning("Ascend custom operator path not found")

        return func(*args, **kwargs)

    return wrapper


def check_empty_string(arg_name, arg_value):
    """Check if str arg is not empty."""
    if arg_value == "":
        raise ValueError(f"{arg_name} must not be empty string!")



[docs]
class Model(BaseModel):
    """
    The `Model` class defines a MindSpore Lite's model, facilitating computational graph management.

    Examples:
        >>> import mindspore_lite as mslite
        >>> model = mslite.Model()
        >>> print(model)
        model_path: .
    """

    def __init__(self):
        super().__init__(_c_lite_wrapper.ModelBind())
        self.model_path_ = ""
        self.lora_name_map = {}
        self.provider = ""

    def __str__(self):
        res = f"model_path: {self.model_path_}."
        return res

    # generate api by del decorator set_env.

[docs]
    def build_from_file(
            self,
            model_path,
            model_type,
            context=None,
            config_path="",
            config_dict: dict = None,
    ):
        """
        Load and build a model from file.

        Args:
            model_path (str): Path of the input model when build from file. For example, "/home/user/model.mindir".
                Model should use .mindir as suffix.
            model_type (ModelType): Define The type of input model file. Option is ``ModelType.MINDIR``.
                For details, see
                `ModelType <https://mindspore.cn/lite/api/en/master/mindspore_lite/mindspore_lite.ModelType.html>`_ .
            context (Context, optional): Define the context used to transfer options during execution.
                Default: ``None``. ``None`` means the Context with cpu target.
            config_path (str, optional): Define the config file path. the config file is used to transfer user defined
                options during build model. In the following scenarios, users may need to set the parameter.
                For example, "/home/user/config.txt". Default: ``""``.

                Set mixed precision inference. The content and description of the configuration file are as follows:

                .. code-block::

                    [execution_plan]
                    [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16)
                    [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32)

            config_dict (dict, optional): When you set config in this dict, the priority is higher than the
                configuration items in config_path.

                Set rank table file for inference. The content of the configuration file is as follows:

                .. code-block::

                    [ascend_context]
                    rank_table_file=[path_a](storage initial path of the rank table file)

                When set

                .. code-block::

                    config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}}

                The the path_b from the config_dict will be used to compile the model.

        Raises:
            TypeError: `model_path` is not a str.
            TypeError: `model_type` is not a ModelType.
            TypeError: `context` is neither a Context nor ``None``.
            TypeError: `config_path` is not a str.
            RuntimeError: `model_path` does not exist.
            RuntimeError: `config_path` does not exist.
            RuntimeError: load configuration by `config_path` failed.
            RuntimeError: build from file failed.

        Examples:
            >>> # Testcase 1: build from file with default cpu context.
            >>> import mindspore_lite as mslite
            >>> model = mslite.Model()
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR)
            >>> print(model)
            model_path: mobilenetv2.mindir.
            >>> # Testcase 2: build from file with gpu context.
            >>> import mindspore_lite as mslite
            >>> model = mslite.Model()
            >>> context = mslite.Context()
            >>> context.target = ["cpu"]
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR, context)
            >>> print(model)
            model_path: mobilenetv2.mindir.
        """
        check_isinstance("model_path", model_path, str)
        check_isinstance("model_type", model_type, ModelType)
        if context is None:
            context = Context()
        check_isinstance("context", context, Context)
        check_isinstance("config_path", config_path, str)
        self.provider = context.ascend.provider
        if not os.path.exists(model_path):
            raise RuntimeError("build_from_file failed, model_path does not exist!")
        self.model_path_ = model_path
        model_type_ = _c_lite_wrapper.ModelType.kMindIR_Lite
        if model_type is ModelType.MINDIR:
            model_type_ = _c_lite_wrapper.ModelType.kMindIR

        self._apply_config(config_path, config_dict)
        ret = self._model.build_from_file(self.model_path_, model_type_, context._inner_context)
        if not ret.IsOk():
            raise RuntimeError(f"build_from_file failed! Error is {ret.ToString()}")


    # generate api by del decorator set_env.

[docs]
    def build_from_buffer(
            self,
            model_bytes,
            weight_bytes=None,
            model_type=None,
            context=None,
            config_path="",
            config_dict: dict = None,
    ):
        """
        Load and build a model from buffer.

        Args:
            model_bytes (Bytes): Bytes of the mindir model when build from buffer.
            weight_bytes (Bytes, optional): Bytes of the separate weight when build from buffer. Default: ``None``.
            model_type (ModelType, optional): Define The type of input model file. Option is ``ModelType.MINDIR``.
                Default: ``None``. For details, see
                `ModelType <https://mindspore.cn/lite/api/en/master/mindspore_lite/mindspore_lite.ModelType.html>`_ .
            context (Context, optional): Define the context used to transfer options during execution.
                Default: ``None``. ``None`` means the Context with cpu target.
            config_path (str, optional): Define the config file path. the config file is used to transfer user defined
                options during build model. In the following scenarios, users may need to set the parameter.
                For example, "/home/user/config.txt". Default: ``""``.

                Set mixed precision inference. The content and description of the configuration
                file are as follows:

                .. code-block::

                    [execution_plan]
                    [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16)
                    [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32)

            config_dict (dict, optional): When you set config in this dict, the priority is higher than the
                configuration items in config_path. Default: ``None``.

                Set rank table file for inference. The content of the configuration file is as follows:

                .. code-block::

                    [ascend_context]
                    rank_table_file=[path_a](storage initial path of the rank table file)

                When set

                .. code-block::

                    config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}}

                The path_b from the config_dict will be used to compile the model.

        Raises:
            TypeError: `model_bytes` is not a Bytes.
            TypeError: `weight_bytes` is neither a Bytes nor ``None``.
            TypeError: `model_type` is not a ModelType.
            TypeError: `context` is neither a Context nor ``None``.
            TypeError: `config_path` is not a str.
            RuntimeError: Length of `model_bytes` is 0.
            RuntimeError: Value of `model_type` is is not ``ModelType.MINDIR``.
            RuntimeError: `config_path` does not exist.
            RuntimeError: Failed to load the configuration file from `config_path`.
            RuntimeError: Failed to load and build the model from the buffer.

        Examples:
            >>> # Testcase 1: build from buffer with a single file mindir model.
            >>> import mindspore_lite as mslite
            >>> with open("mobilenetv2.mindir", "rb") as f:
            >>>     model_bytes = f.read()
            >>> model = mslite.Model()
            >>> model.build_from_buffer(model_bytes, None, mslite.ModelType.MINDIR)
            >>> print(model)
            model_path: None.
            >>> # Testcase 2: build from buffer with a separated weight model.
            >>> import mindspore_lite as mslite
            >>> with open("sd1.5_unet.onnx_graph.mindir", "rb") as f:
            >>>     model_bytes = f.read()
            >>> with open("sd1.5_unet.onnx_variables/data_0", "rb") as f:
            >>>     weight_bytes = f.read()
            >>> model = mslite.Model()
            >>> context = mslite.Context()
            >>> context.target = ["ascend"]
            >>> model.build_from_buffer(model_bytes, weight_bytes, mslite.ModelType.MINDIR, context)
            >>> print(model)
            model_path: None.
        """
        check_isinstance("model_bytes", model_bytes, bytes)
        if len(model_bytes) == 0:
            raise RuntimeError("build_from_buffer failed, model_bytes is empty.")

        check_isinstance("weight_bytes", weight_bytes, bytes, enable_none=True)
        check_isinstance("model_type", model_type, ModelType)
        if model_type != ModelType.MINDIR:
            raise RuntimeError("build_from_buffer failed, model_type should be MINDIR")

        model_type_ = _c_lite_wrapper.ModelType.kMindIR

        if context is None:
            context = Context()
        check_isinstance("context", context, Context)
        check_isinstance("config_path", config_path, str)

        self.provider = context.ascend.provider

        self.model_path_ = None

        self._apply_config(config_path, config_dict)

        ret = self._model.build_from_buff(model_bytes, weight_bytes, model_type_, context._inner_context)
        if not ret.IsOk():
            raise RuntimeError(f"build_from_buffer failed! Error is {ret.ToString()}")


    def _apply_config(self, config_path, config_dict):
        """
        Apply config for build.
        """
        if config_path:
            if not os.path.exists(config_path):
                raise RuntimeError("build_from_file failed, config_path does not exist!")
            ret = self._model.load_config(config_path)
            if not ret.IsOk():
                raise RuntimeError(f"load configuration failed! Error is {ret.ToString()}")
            parse_res = _parse_update_weight_config_name(config_path)
            if parse_res is not None and len(parse_res) >= 2:
                update_names, self.lora_name_map = parse_res[0], parse_res[1]
                if config_dict is None:
                    config_dict = {"ascend_context": {"variable_weights_list": update_names}}
                else:
                    config_dict["ascend_context"]["variable_weights_list"] = update_names

        if config_dict:
            check_isinstance("config_dict", config_dict, dict)
            for k, v in config_dict.items():
                check_isinstance("config_dict_key", k, str)
                check_isinstance("config_dict_value", v, dict)
                for v_k, v_v in v.items():
                    check_isinstance("config_dict_value_key", v_k, str)
                    check_isinstance("config_dict_value_value", v_v, str)
            for key, value in config_dict.items():
                ret = self._model.update_config(key, value)
                if not ret.IsOk():
                    raise RuntimeError(f"update configuration failed! Error is {ret.ToString()}.")


[docs]
    def get_outputs(self):
        """
        Obtains all output information Tensors of the model.

        Returns:
            list[TensorMeta], the output TensorMeta list of the model.
        """
        # pylint: disable=useless-super-delegation
        return super().get_outputs()



[docs]
    def get_inputs(self):
        """
        Obtains all input Tensors of the model.

        Returns:
            list[Tensor], the input Tensor list of the model.

        Examples:
            >>> import mindspore_lite as mslite
            >>> model = mslite.Model()
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR)
            >>> inputs = model.get_inputs()
        """
        # pylint: disable=useless-super-delegation
        return super().get_inputs()



[docs]
    def update_weights(self, weights):
        """
        Update constant weight of the model node.

        Args:
            weights (list[list[Tensor]]): A list that includes all update weight Tensors.

        Raises:
            RuntimeError: `weights` is not a list(list).
            RuntimeError: `weights` is a list, but the elements are not Tensor.
            RuntimeError: update weight failed.

        Tutorial Examples:
            - `Dynamic Weight Update
              <https://www.mindspore.cn/lite/cloud_docs/en/master/mindir/runtime_python.html#update-weights>`_
        """
        if not isinstance(weights, list):
            raise TypeError(f"weights must be list, but got {type(weights)}.")
        for i, weight in enumerate(weights):
            if not isinstance(weight, list):
                raise TypeError(f"weight must be list, but got {type(weight)}.")
            for j, tensor in enumerate(weight):
                if not isinstance(tensor, Tensor):
                    raise TypeError(f"weights element must be Tensor, but got " f"{type(tensor)} as index {i}{j}.")
                if tensor.name in self.lora_name_map and self.provider == "ge":
                    name = self.lora_name_map[tensor.name]
                    tensor.name = name
                elif self.provider == "ge":
                    name = _rename_variable_weight(tensor.name)
                    tensor.name = name
        return super().update_weights(weights)



[docs]
    def predict(self, inputs, outputs=None):
        """
        Inference model.

        Args:
            inputs (list[Tensor]): A list that includes all input Tensors in order.
            outputs (list[Tensor], optional): A list that includes all output Tensors in order,
                this tensor include output data buffer.

        Returns:
            list[Tensor], the output Tensor list of the model.

        Raises:
            TypeError: `inputs` is not a list.
            TypeError: `outputs` is not a list.
            TypeError: `inputs` is a list, but the elements are not Tensor.
            TypeError: `outputs` is a list, but the elements are not Tensor.
            RuntimeError: predict model failed.

        Examples:
            >>> # 1. predict which indata is from file
            >>> import mindspore_lite as mslite
            >>> import numpy as np
            >>> model = mslite.Model()
            >>> #default context's target is cpu
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR)
            >>> inputs = model.get_inputs()
            >>> in_data = np.fromfile("input.bin", dtype=np.float32)
            >>> inputs[0].set_data_from_numpy(in_data)
            >>> outputs = model.predict(inputs)
            >>> for output in outputs:
            ...     data = output.get_data_to_numpy()
            ...     print("outputs' shape: ", data.shape)
            ...
            outputs' shape:  (1,1001)
            >>> # 2. predict which indata is numpy array
            >>> import mindspore_lite as mslite
            >>> import numpy as np
            >>> model = mslite.Model()
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR)
            >>> inputs = model.get_inputs()
            >>> for input in inputs:
            ...     in_data = np.arange(1 * 3 * 224 * 224, dtype=np.float32).reshape((1, 3, 224, 224))
            ...     input.set_data_from_numpy(in_data)
            ...
            >>> outputs = model.predict(inputs)
            >>> for output in outputs:
            ...     data = output.get_data_to_numpy()
            ...     print("outputs' shape: ", data.shape)
            ...
            outputs' shape:  (1,1001)
            >>> # 3. predict which indata is from new MindSpore Lite's Tensor with numpy array
            >>> import mindspore_lite as mslite
            >>> import numpy as np
            >>> model = mslite.Model()
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR)
            >>> inputs = model.get_inputs()
            >>> input_tensors = []
            >>> for input in inputs:
            ...     input_tensor = mslite.Tensor()
            ...     input_tensor.dtype = input.dtype
            ...     input_tensor.shape = input.shape
            ...     input_tensor.format = input.format
            ...     input_tensor.name = input.name
            ...     in_data = np.arange(1 * 3 * 224 * 224, dtype=np.float32).reshape((1, 3, 224, 224))
            ...     input_tensor.set_data_from_numpy(in_data)
            ...     input_tensors.append(input_tensor)
            ...
            >>> outputs = model.predict(input_tensors)
            >>> for output in outputs:
            ...     data = output.get_data_to_numpy()
            ...     print("outputs' shape: ", data.shape)
            ...
            outputs' shape:  (1,1001)
        """
        # pylint: disable=useless-super-delegation
        if not isinstance(inputs, (list, tuple)):
            raise TypeError(f"inputs must be list or tuple, but got {type(inputs)}.")
        model_input_tensors = self.get_inputs()
        if len(model_input_tensors) != len(inputs):
            raise RuntimeError(f"model input len:{len(model_input_tensors)} not equal input len:{len(inputs)}!")
        inputs_tensor = []
        for i, in_tensor in enumerate(inputs):
            if isinstance(in_tensor, np.ndarray):
                model_input_tensors[i].set_data_from_numpy(in_tensor)
                inputs_tensor.append(model_input_tensors[i])
            elif isinstance(in_tensor, Tensor):
                inputs_tensor.append(in_tensor)
            else:
                raise TypeError("inputs element must be Tensor, of numpy.")
        return super().predict(inputs_tensor, outputs)



[docs]
    def resize(self, inputs, dims):
        """
        Resizes the shapes of inputs. This method is used in the following scenarios:

        1. If multiple inputs of the same size need to predicted, you can set the batch dimension of `dims` to
           the number of inputs, then multiple inputs can be performed inference at the same time.

        2. Adjust the input size to the specify shape.

        3. When the input is a dynamic shape (a dimension of the shape of the model input contains -1), -1 must be
           replaced by a fixed dimension through `resize` . (When `provider` is set to `ge`, model input shape is
           fixed by input Tensor shape.)

        4. The shape operator contained in the model is dynamic shape (a dimension of the shape operator contains -1).

        Args:
            inputs (list[Tensor]): A list that includes all input Tensors in order.
            dims (list[list[int]]): A list that includes the new shapes of input Tensors, should be consistent with
                input Tensors' shape.

        Raises:
            TypeError: `inputs` is not a list.
            TypeError: `inputs` is a list, but the elements are not Tensor.
            TypeError: `dims` is not a list.
            TypeError: `dims` is a list, but the elements are not list.
            TypeError: `dims` is a list, the elements are list, but the element's elements are not int.
            ValueError: The size of `inputs` is not equal to the size of `dims` .
            RuntimeError: resize inputs failed.

        Examples:
            >>> import mindspore_lite as mslite
            >>> model = mslite.Model()
            >>> model.build_from_file("mobilenetv2.mindir", mslite.ModelType.MINDIR)
            >>> inputs = model.get_inputs()
            >>> print("Before resize, the first input shape: ", inputs[0].shape)
            Before resize, the first input shape: [1, 3, 224, 224]
            >>> model.resize(inputs, [[1, 3, 112, 112]])
            >>> print("After resize, the first input shape: ", inputs[0].shape)
            After resize, the first input shape: [1, 3, 112, 112]
        """
        # pylint: disable=useless-super-delegation
        super().resize(inputs, dims)





[docs]
class ModelParallelRunner:
    """
    The `ModelParallelRunner` class defines a MindSpore Lite's Runner, which support model parallelism. Compared with
    `model` , `model` does not support parallelism, but `ModelParallelRunner` supports parallelism. A Runner contains
    multiple workers, which are the units that actually perform parallel inferring. The primary use case is when
    multiple clients send inference tasks to the server, the server perform parallel inference, shorten the inference
    time, and then return the inference results to the clients.

    Examples:
        >>> # Use case: serving inference.
        >>> # precondition 1: Building MindSpore Lite serving package by export MSLITE_ENABLE_CLOUD_INFERENCE=on.
        >>> # precondition 2: install wheel package of MindSpore Lite built by precondition 1.
        >>> import mindspore_lite as mslite
        >>> model_parallel_runner = mslite.ModelParallelRunner()
        >>> print(model_parallel_runner)
        model_path: .
    """

    def __init__(self):
        if hasattr(_c_lite_wrapper, "ModelParallelRunnerBind"):
            self._model = _c_lite_wrapper.ModelParallelRunnerBind()
        else:
            raise RuntimeError(
                "ModelParallelRunner init failed, If you want to use it, you need to build"
                "MindSpore Lite serving package by export MSLITE_ENABLE_CLOUD_INFERENCE=on."
            )
        self.model_path_ = ""

    def __str__(self):
        return f"model_path: {self.model_path_}."


[docs]
    def build_from_file(self, model_path, context=None):
        """
        build a model parallel runner from model path so that it can run on a device.

        Args:
            model_path (str): Define the model path.
            context (Context, optional): Define the config used to transfer context and options during building model.
                Default: ``None``. ``None`` means the Context with cpu target. Context has the default parallel
                attribute.

        Raises:
            TypeError: `model_path` is not a str.
            TypeError: `context` is neither a Context nor ``None``.
            RuntimeError: `model_path` does not exist.
            RuntimeError: ModelParallelRunner's init failed.

        Examples:
            >>> # Use case: serving inference.
            >>> # precondition 1: Building MindSpore Lite serving package by export MSLITE_ENABLE_CLOUD_INFERENCE=on.
            >>> # precondition 2: install wheel package of MindSpore Lite built by precondition 1.
            >>> import mindspore_lite as mslite
            >>> context = mslite.Context()
            >>> context.target = ["cpu"]
            >>> context.parallel.workers_num = 4
            >>> model_parallel_runner = mslite.ModelParallelRunner()
            >>> model_parallel_runner.build_from_file(model_path="mobilenetv2.mindir", context=context)
            >>> print(model_parallel_runner)
            model_path: mobilenetv2.mindir.
        """
        check_isinstance("model_path", model_path, str)
        if not os.path.exists(model_path):
            raise RuntimeError("ModelParallelRunner's build from file failed, model_path does not exist!")
        self.model_path_ = model_path
        if context is None:
            ret = self._model.init(self.model_path_, None)
        else:
            check_isinstance("context", context, Context)
            ret = self._model.init(self.model_path_, context.parallel._runner_config)
        if not ret.IsOk():
            raise RuntimeError(f"ModelParallelRunner's build from file failed! Error is {ret.ToString()}")



[docs]
    def get_inputs(self):
        """
        Obtains all input Tensors of the model.

        Returns:
            list[Tensor], the input Tensor list of the model.

        Examples:
            >>> # Use case: serving inference.
            >>> # precondition 1: Building MindSpore Lite serving package by export MSLITE_ENABLE_CLOUD_INFERENCE=on.
            >>> # precondition 2: install wheel package of MindSpore Lite built by precondition 1.
            >>> import mindspore_lite as mslite
            >>> context = mslite.Context()
            >>> context.target = ["cpu"]
            >>> context.parallel.workers_num = 4
            >>> model_parallel_runner = mslite.ModelParallelRunner()
            >>> model_parallel_runner.build_from_file(model_path="mobilenetv2.mindir", context=context)
            >>> inputs = model_parallel_runner.get_inputs()
        """
        inputs = []
        for _tensor in self._model.get_inputs():
            inputs.append(Tensor(_tensor))
        return inputs



[docs]
    def predict(self, inputs, outputs=None):
        """
        Inference ModelParallelRunner.

        Args:
            inputs (list[Tensor]): A list that includes all input Tensors in order.
            outputs (list[Tensor], optional): A list that includes all output Tensors in order,
                this tensor include output data buffer.

        Returns:
            list[Tensor], outputs, the model outputs are filled in the container in sequence.

        Raises:
            TypeError: `inputs` is not a list.
            TypeError: `inputs` is a list, but the elements are not Tensor.
            RuntimeError: predict model failed.

        Examples:
            >>> # Use case: serving inference.
            >>> # Precondition 1: Download MindSpore Lite serving package or building MindSpore Lite serving package by
            >>> #                 export MSLITE_ENABLE_CLOUD_INFERENCE=on.
            >>> # Precondition 2: Install wheel package of MindSpore Lite built by precondition 1.
            >>> # The result can be find in the tutorial of runtime_parallel_python.
            >>> import time
            >>> from threading import Thread
            >>> import numpy as np
            >>> import mindspore_lite as mslite
            >>> # the number of threads of one worker.
            >>> # WORKERS_NUM * THREAD_NUM should not exceed the number of cores of the machine.
            >>> THREAD_NUM = 1
            >>> # In parallel inference, the number of workers in one `ModelParallelRunner` in server.
            >>> # If you prepare to compare the time difference between parallel inference and serial inference,
            >>> # you can set WORKERS_NUM = 1 as serial inference.
            >>> WORKERS_NUM = 3
            >>> # Simulate 5 clients, and each client sends 2 inference tasks to the server at the same time.
            >>> PARALLEL_NUM = 5
            >>> TASK_NUM = 2
            >>>
            >>>
            >>> def parallel_runner_predict(parallel_runner, parallel_id):
            ...     # One Runner with 3 workers, set model input, execute inference and get output.
            ...     task_index = 0
            ...     while True:
            ...         if task_index == TASK_NUM:
            ...             break
            ...         task_index += 1
            ...         # Set model input
            ...         inputs = parallel_runner.get_inputs()
            ...         in_data = np.fromfile("input.bin", dtype=np.float32)
            ...         inputs[0].set_data_from_numpy(in_data)
            ...         once_start_time = time.time()
            ...         # Execute inference
            ...         outputs = parallel_runner.predict(inputs)
            ...         once_end_time = time.time()
            ...         print("parallel id: ", parallel_id, " | task index: ", task_index, " | run once time: ",
            ...               once_end_time - once_start_time, " s")
            ...         # Get output
            ...         for output in outputs:
            ...             tensor_name = output.name.rstrip()
            ...             data_size = output.data_size
            ...             element_num = output.element_num
            ...             print("tensor name is:%s tensor size is:%s tensor elements num is:%s" % (tensor_name,
            ...                                                                                      data_size,
            ...                                                                                      element_num))
            ...
            ...             data = output.get_data_to_numpy()
            ...             data = data.flatten()
            ...             print("output data is:", end=" ")
            ...             for j in range(5):
            ...                 print(data[j], end=" ")
            ...             print("")
            ...
            >>> # Init RunnerConfig and context, and add CPU device info
            >>> context = mslite.Context()
            >>> context.target = ["cpu"]
            >>> context.cpu.enable_fp16 = False
            >>> context.cpu.thread_num = THREAD_NUM
            >>> context.cpu.inter_op_parallel_num = THREAD_NUM
            >>> context.parallel.workers_num = WORKERS_NUM
            >>> # Build ModelParallelRunner from file
            >>> model_parallel_runner = mslite.ModelParallelRunner()
            >>> model_parallel_runner.build_from_file(model_path="mobilenetv2.mindir", context=context)
            >>> # The server creates 5 threads to store the inference tasks of 5 clients.
            >>> threads = []
            >>> total_start_time = time.time()
            >>> for i in range(PARALLEL_NUM):
            ...     threads.append(Thread(target=parallel_runner_predict, args=(model_parallel_runner, i,)))
            ...
            >>> # Start threads to perform parallel inference.
            >>> for th in threads:
            ...     th.start()
            ...
            >>> for th in threads:
            ...     th.join()
            ...
            >>> total_end_time = time.time()
            >>> print("total run time: ", total_end_time - total_start_time, " s")
        """
        if not isinstance(inputs, (list, tuple)):
            raise TypeError(f"inputs must be list or tuple, but got {type(inputs)}.")
        _inputs = []
        for i, element in enumerate(inputs):
            if not isinstance(element, Tensor):
                raise TypeError(f"inputs element must be Tensor, but got " f"{type(element)} at index {i}.")
            _inputs.append(element._tensor)
        _outputs = []
        if outputs is not None:
            if not isinstance(outputs, list):
                raise TypeError(f"outputs must be list, but got {type(outputs)}.")
            for i, element in enumerate(outputs):
                if not isinstance(element, Tensor):
                    raise TypeError(f"outputs element must be Tensor, but got {type(element)} at index {i}.")
                # pylint: disable=protected-access
                _outputs.append(element._tensor)

        _outputs = self._model.predict(_inputs, _outputs, None, None)
        if not isinstance(_outputs, list) or len(_outputs) == 0:
            raise RuntimeError("predict failed!")
        predict_outputs = []
        for _output in _outputs:
            predict_outputs.append(Tensor(_output))
        return predict_outputs





[docs]
class ModelGroupFlag(Enum):
    """
    The `ModelGroupFlag` class defines the type of the model group.

    The `ModelGroupFlag` is used to define the flags used to construct a `ModelGroup`. Currently, supports:

    1. `ModelGroupFlag.SHARE_WEIGHT`, multiple models share weights(including constants and variables) memory.
       Currently only supported in cloud side Ascend inference and the provider is GE.

    2. `ModelGroupFlag.SHARE_WORKSPACE`, multiple models share weights share workspace memory, default construction flag
       for `ModelGroup`.

    3. `ModelGroupFlag.SHARE_WEIGHT_WORKSPACE`, shared weight memory and workspace memory.

    Examples:
        >>> import mindspore_lite as mslite
        >>> context = mslite.Context()
        >>> context.target = ["Ascend"]
        >>> context.ascend.device_id = 0
        >>> context.ascend.rank_id = 0
        >>> context.ascend.provider = "ge"
        >>> model_group = mslite.ModelGroup(mslite.ModelGroupFlag.SHARE_WEIGHT)
        >>> model0 = mslite.Model()
        >>> model1 = mslite.Model()
        >>> model_group.add_model([model0, model1])
        >>> model0.build_from_file("seq_1024.mindir", mslite.ModelType.MINDIR, context, "config0.ini")
        >>> model1.build_from_file("seq_1.mindir", mslite.ModelType.MINDIR, context, "config.ini")
    """

    SHARE_WEIGHT = 1
    SHARE_WORKSPACE = 2
    SHARE_WEIGHT_WORKSPACE = 3



MODEL_GROUP_FLAG_PY_CXX_MAP = {
    ModelGroupFlag.SHARE_WEIGHT: _c_lite_wrapper.ModelGroupFlag.kShareWeight,
    ModelGroupFlag.SHARE_WORKSPACE: _c_lite_wrapper.ModelGroupFlag.kShareWorkspace,
    ModelGroupFlag.SHARE_WEIGHT_WORKSPACE: _c_lite_wrapper.ModelGroupFlag.kShareWeightAndWorkspace,
}



[docs]
class ModelGroup:
    """
    The `ModelGroup` class is used to define a MindSpore model group,
    facilitating multiple models to share workspace memory or weights
    (including constants and variables) memory or both.

    Args:
       flags (ModelGroupFlag, optional): Indicates the type of the model group.
           Default: ``ModelGroupFlag.SHARE_WEIGHT``.

    Examples:
        >>> # Multi models share workspace memory
        >>> import mindspore_lite as mslite
        >>> context = mslite.Context()
        >>> context.target = ["Ascend"]
        >>> context.ascend.device_id = 0
        >>> context.ascend.rank_id = 0
        >>> context.ascend.provider = "ge"
        >>> model_type=mslite.ModelType.MINDIR
        >>> path1 = "path_to_model1"
        >>> path2 = "path_to_model2"
        >>> model_group = mslite.ModelGroup()
        >>> model_group.add_model([path1, path2])
        >>> model_group.cal_max_size_of_workspace(model_type, context)
        >>>
        >>> # Multi models share weights memory
        >>> import mindspore_lite as mslite
        >>> context = mslite.Context()
        >>> context.target = ["Ascend"]
        >>> context.ascend.device_id = 0
        >>> context.ascend.rank_id = 0
        >>> context.ascend.provider = "ge"
        >>> model_group = mslite.ModelGroup(mslite.ModelGroupFlag.SHARE_WEIGHT)
        >>> model0 = mslite.Model()
        >>> model1 = mslite.Model()
        >>> model_group.add_model([model0, model1])
        >>> model0.build_from_file("seq_1024.mindir", mslite.ModelType.MINDIR, context, "config0.ini")
        >>> model1.build_from_file("seq_1.mindir", mslite.ModelType.MINDIR, context, "config.ini")
    """

    def __init__(self, flags=ModelGroupFlag.SHARE_WORKSPACE):
        if flags == ModelGroupFlag.SHARE_WORKSPACE:
            flags_inner = _c_lite_wrapper.ModelGroupFlag.kShareWorkspace
        elif flags == ModelGroupFlag.SHARE_WEIGHT:
            flags_inner = _c_lite_wrapper.ModelGroupFlag.kShareWeight
        elif flags == ModelGroupFlag.SHARE_WEIGHT_WORKSPACE:
            flags_inner = _c_lite_wrapper.ModelGroupFlag.kShareWeightAndWorkspace
        else:
            raise RuntimeError(
                "Parameter flags should be ModelGroupFlag.SHARE_WORKSPACE or "
                "ModelGroupFlag.SHARE_WEIGHT or ModelGroupFlag.SHARE_WEIGHT_WORKSPACE"
            )
        self._model_group = _c_lite_wrapper.ModelGroupBind(flags_inner)


[docs]
    def add_model(self, models):
        """
        Used to define MindSpore Lite model grouping information, which is used to share workspace memory or
        weight (including constants and variables) memory. This interface only supports weight memory
        sharing when the `models` is a tuple or list of `Model` objects, and only supports workspace memory sharing in
        other scenarios.

        Args:
           models (union[tuple/list(str), tuple/list(Model)]): Define the list/tuple of model paths or Model objects.

        Raises:
           TypeError: `models` is not a list and tuple.
           TypeError: `models` is a list or tuple, but the elements are not all str or Model.
           RuntimeError: Failed to add model grouping information.
        """
        if not isinstance(models, (list, tuple)):
            raise TypeError(f"models must be list/tuple, but got {type(models)}")
        if not models:
            raise RuntimeError("models cannot be empty")
        model0 = models[0]
        if isinstance(model0, str):
            for i, element in enumerate(models):
                if not isinstance(element, str):
                    raise TypeError(
                        f"models element must be all str or Model, but got " f"{type(element)} at index {i}."
                    )
            ret = self._model_group.add_model(models)
        elif isinstance(model0, Model):
            for i, element in enumerate(models):
                if not isinstance(element, Model):
                    raise TypeError(
                        f"models element must be all str or Model, but got " f"{type(element)} at index {i}."
                    )
            models_inner = [model._model for model in models]
            ret = self._model_group.add_model_by_object(models_inner)
        else:
            raise TypeError(f"models element must be all str or Model, but got {type(model0)} at index 0.")
        if not ret.IsOk():
            raise RuntimeError("ModelGroup's add model failed.")



[docs]
    def cal_max_size_of_workspace(self, model_type, context):
        """
        Calculate the max workspace of the added models. Only valid when the type of `ModelGroup` is
        ``ModelGroupFlag.SHARE_WORKSPACE``.

        Args:
           model_type (ModelType): model_type Define The type of model file.
           context (Context): context A context used to store options.

        Raises:
           TypeError: `model_type` is not a ModelType.
           TypeError: `context` is a Context.
           RuntimeError: cal max size of workspace failed.
        """
        check_isinstance("context", context, Context)
        check_isinstance("model_type", model_type, ModelType)
        model_type_ = _c_lite_wrapper.ModelType.kMindIR_Lite
        if model_type is ModelType.MINDIR:
            model_type_ = _c_lite_wrapper.ModelType.kMindIR
        ret = self._model_group.cal_max_size_of_workspace(model_type_, context._inner_context)
        if not ret.IsOk():
            raise RuntimeError("ModelGroup's cal max size of workspace failed.")





[docs]
class MultiModelRunner:
    """
    The `MultiModelRunner` class is used to create mindir with multiple Models
    and provides a way to schedule multiple models.

    Examples:
        >>> import mindspore_lite as mslite
        >>> import numpy as np
        >>> dtype_map = {
        >>>     mslite.DataType.FLOAT32:np.float32,
        >>>     mslite.DataType.INT32:np.int32,
        >>>     mslite.DataType.FLOAT16:np.float16,
        >>>     mslite.DataType.INT8:np.int8
        >>> }
        >>> model_type=mslite.ModelType.MINDIR
        >>> context = mslite.Context()
        >>> context.ascend.device_id = 2
        >>> model_path = "path_to_model1"
        >>> model_runner = mslite.MultiModelRunner()
        >>> model_runner.build_from_file(model_path, mslite.ModelType.MINDIR, context)
        >>> execs = model_runner.get_model_ececutor()
        >>> for exec_ in execs:
        >>>     exec_inputs = exec_.get_inputs()
        >>>     exec_outputs = exec_.get_outputs()
        >>>     for i, input in enumerate(exec_inputs):
        >>>         data = np.random.randn(*input.shape).astype(dtype_map[input.dtype])
        >>>         input.set_data_from_numpy(data)
        >>>     exec.predict(exec_inputs)
    """

    def __init__(self):
        self._runner = _c_lite_wrapper.MultiModelRunnerBind()


[docs]
    def build_from_file(
            self,
            model_path,
            model_type,
            context=None,
            config_path="",
            config_dict: dict = None,
    ):
        """
        Load and build a runner from file.

        Args:
            model_path (str): Path of the input model when build from file. For example, "/home/user/model.mindir".
                Model should use .mindir as suffix.
            model_type (ModelType): Define The type of input model file. Option is ``ModelType.MINDIR``.
                For details, see
                `ModelType <https://mindspore.cn/lite/api/en/master/mindspore_lite/mindspore_lite.ModelType.html>`_ .
            context (Context, optional): Define the context used to transfer options during execution.
                Default: ``None``. ``None`` means the Context with cpu target.
            config_path (str, optional): Define the config file path. the config file is used to transfer user defined
                options during build model. In the following scenarios, users may need to set the parameter.
                For example, "/home/user/config.txt". Default: ``""``.

                Set mixed precision inference. The content and description of the configuration file are as
                follows:

                .. code-block::

                    [execution_plan]
                    [op_name1]=data_Type: float16 (The operator named op_name1 sets the data type as float16)
                    [op_name2]=data_Type: float32 (The operator named op_name2 sets the data type as float32)

            config_dict (dict, optional): When you set config in this dict, the priority is higher than the
                configuration items in config_path.

                Set rank table file for inference. The content of the configuration file is as follows:

                .. code-block::

                    [ascend_context]
                    rank_table_file=[path_a](storage initial path of the rank table file)

                When set

                .. code-block::

                    config_dict = {"ascend_context" : {"rank_table_file" : "path_b"}}

                The the path_b from the config_dict will be used to compile the model.

        Raises:
            TypeError: `model_path` is not str.
            TypeError: `model_type` is not ModelType.
            TypeError: `context` is not Context or ``None`` .
            TypeError: `config_path` is not str.
            RuntimeError: `model_path` file path not exist.
            RuntimeError: `config_path` file path not exist.
            RuntimeError: load `config_path` failed.
            RuntimeError: load and build MultiModelRunner failed.
        """
        check_isinstance("model_path", model_path, str)
        check_isinstance("model_type", model_type, ModelType)
        if context is None:
            context = Context()
        check_isinstance("context", context, Context)
        check_isinstance("config_path", config_path, str)
        if not os.path.exists(model_path):
            raise RuntimeError("build_from_file failed, model_path does not exist!")
        model_type_ = _c_lite_wrapper.ModelType.kMindIR
        if model_type is not ModelType.MINDIR:
            raise RuntimeError("build_from_file failed, model_type only support MINDIR!")
        if config_path:
            if not os.path.exists(config_path):
                raise RuntimeError("build_from_file failed, config_path does not exist!")
            ret = self._runner.load_config(config_path)
            if not ret.IsOk():
                raise RuntimeError(f"load configuration failed! Error is {ret.ToString()}")
        if config_dict:
            check_isinstance("config_dict", config_dict, dict)
            for k, v in config_dict.items():
                check_isinstance("config_dict_key", k, str)
                check_isinstance("config_dict_value", v, dict)
                for v_k, v_v in v.items():
                    check_isinstance("config_dict_value_key", v_k, str)
                    check_isinstance("config_dict_value_value", v_v, str)
            for key, value in config_dict.items():
                ret = self._runner.update_config(key, value)
                if not ret.IsOk():
                    raise RuntimeError(f"update configuration failed! Error is {ret.ToString()}.")

        ret = self._runner.build_from_file(model_path, model_type_, context._inner_context)
        if not ret.IsOk():
            raise RuntimeError(f"build_from_file failed! Error is {ret.ToString()}")



[docs]
    def get_model_executor(self):
        """
        Get ModelExecutors from MultiModelRunner.

        Returns:
            list[ModelExecutor], all Executors in MultiModelRunner.
        """
        executors = []
        for executor_ in self._runner.get_model_executor():
            executors.append(ModelExecutor(executor_))
        return executors





[docs]
class ModelExecutor:
    """
    The `ModelExecutor` class wraps multiple mindspore_lite models and implements their inference scheduling.

    Args:
        executor (_c_lite_wrapper.ModelExecBind, optional): ModelExecutor class wrapped with pybind11.
            Default: ``None``.
    """

    def __init__(self, executor=None):
        if executor is None:
            self._executor = _c_lite_wrapper.ModelExecBind()
        else:
            self._executor = executor


[docs]
    def predict(self, inputs, outputs=None):
        """
            Inference ModelExecutor.

            Args:
                inputs (list[Tensor]): A list that includes all input Tensors in order.
                outputs (list[Tensor], optional): A list that includes all output Tensors in order,
                    this tensor include output data buffer.

            Returns:
                list[Tensor], the output Tensor list of the ModelExecutor.

            Raises:
                TypeError: `inputs` is not a list.
                TypeError: `outputs` is not a list.
                TypeError: `inputs` is a list, but the elements are not Tensor.
                TypeError: `outputs` is a list, but the elements are not Tensor.
                RuntimeError: predict model failed.
        """
        if not isinstance(inputs, (list, tuple)):
            raise TypeError(f"inputs must be list or tuple, but got {type(inputs)}.")
        model_input_tensors = self.get_inputs()
        inputs_tensor = []
        for i, in_tensor in enumerate(inputs):
            if isinstance(in_tensor, np.ndarray):
                model_input_tensors[i].set_data_from_numpy(in_tensor)
                inputs_tensor.append(model_input_tensors[i])
            elif isinstance(in_tensor, Tensor):
                inputs_tensor.append(in_tensor)
            else:
                raise TypeError("inputs element must be Tensor, or numpy.")
        _inputs = []
        _outputs = []
        for i, element in enumerate(inputs_tensor):
            if not isinstance(element, Tensor):
                raise TypeError(f"inputs element must be Tensor, but got " f"{type(element)} at index {i}.")
            _inputs.append(element._tensor)
        if outputs is not None:
            if not isinstance(outputs, list):
                raise TypeError(f"outputs must be list, but got {type(outputs)}.")
            for i, element in enumerate(outputs):
                if not isinstance(element, Tensor):
                    raise TypeError(f"outputs element must be Tensor, but got " f"{type(element)} at index {i}.")
                _outputs.append(element._tensor)
        predict_result = self._executor.predict(_inputs, _outputs)
        if predict_result is None or len(predict_result) == 0:
            raise RuntimeError("predict failed!")
        predict_outputs = []
        for output_tensor in predict_result:
            predict_outputs.append(Tensor(output_tensor))
        return predict_outputs



[docs]
    def get_inputs(self):
        """
        Obtains all input Tensors of the ModelExecutor.

        Returns:
            list[Tensor], the input Tensor list of the ModelExecutor.
        """
        inputs = []
        for _tensor in self._executor.get_inputs():
            inputs.append(Tensor(_tensor))
        return inputs



[docs]
    def get_outputs(self):
        """
        Obtains all output information Tensors of the ModelExecutor.

        Returns:
            list[TensorMeta], the output TensorMeta list of the ModelExecutor.
        """
        outputs_metadata = []
        for _tensor in self._executor.get_outputs():
            out_tensor = Tensor(_tensor)
            output_meta = TensorMeta()
            output_meta.name = out_tensor.name
            output_meta.dtype = out_tensor.dtype
            output_meta.shape = out_tensor.shape
            output_meta.format = out_tensor.format
            output_meta.element_num = out_tensor.element_num
            output_meta.data_size = out_tensor.data_size
            outputs_metadata.append(output_meta)
        return tuple(outputs_metadata)