Source code for mindspore.ops.operations.rl_ops

# Copyright 2021-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Operators for reinforce learning."""

from functools import reduce
import mindspore.context as context
from mindspore import _checkparam as validator
from ...common import dtype as mstype
from ..primitive import prim_attr_register, PrimitiveWithInfer


[docs]class BufferSample(PrimitiveWithInfer):
    r"""
    In reinforcement learning, the data is sampled from the replaybuffer randomly.

    Returns the tuple tensor with the given shape, decided by the given batchsize.

    .. warning::
        This is an experimental API that is subject to change or deletion.

    Args:
        capacity (int64): Capacity of the buffer, must be non-negative.
        batch_size (int64): The size of the sampled data, lessequal to `capacity`.
        buffer_shape (tuple(shape)): The shape of an buffer.
        buffer_dtype (tuple(type)): The type of an buffer.
        seed (int64): Random seed for sample. Default: 0. If use the default seed, it will generate a ramdom
        one in kernel. Set a number other than `0` to keep a specific seed. Default: 0.
        unique (bool): Whether the sampled data is strictly unique. Setting it to False has a better performance.
            Default: False

    Inputs:
        - **data** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents replaybuffer,
          each tensor is described by the `buffer_shape` and `buffer_type`.
        - **count** (Parameter) - The count means the real available size of the buffer,
          data type: int32.
        - **head** (Parameter) - The position of the first data in buffer, data type: int32.

    Outputs:
        tuple(Tensor). The shape is `batch_size` * `buffer_shape`. The dtype is `buffer_dtype`.

    Raises:
        TypeError: If `buffer_shape` is not a tuple.
        ValueError: If batch_size is larger than capacity.
        ValueError: If `capacity` is not a positive integer.

    Supported Platforms:
        ``GPU`` ``CPU``

    Examples:
        >>> capacity = 100
        >>> batch_size = 5
        >>> count = Parameter(Tensor(5, ms.int32), name="count")
        >>> head = Parameter(Tensor(0, ms.int32), name="head")
        >>> shapes = [(4,), (2,), (1,), (4,)]
        >>> types = [ms.float32, ms.int32, ms.int32, ms.float32]
        >>> buffer = [Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="states"),
        ...           Parameter(Tensor(np.arange(100 * 2).reshape(100, 2).astype(np.int32)), name="action"),
        ...           Parameter(Tensor(np.ones((100, 1)).astype(np.int32)), name="reward"),
        ...           Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="state_")]
        >>> buffer_sample = ops.BufferSample(capacity, batch_size, shapes, types)
        >>> output = buffer_sample(buffer, count, head)
        >>> print(output)
            (Tensor(shape=[5, 4], dtype=Float32, value=
                [[ 0.00000000e+00, 1.00000000e+00, 2.00000000e+00, 3.00000000e+00],
                [ 8.00000000e+00, 9.00000000e+00, 1.00000000e+01, 1.10000000e+01],
                [ 1.60000000e+01, 1.70000000e+01, 1.80000000e+01, 1.90000000e+01],
                [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01],
                [ 3.20000000e+01, 3.30000000e+01, 3.40000000e+01, 3.50000000e+01]]),
             Tensor(shape=[5, 2], dtype=Int32, value=
                [[ 0, 1],
                [ 4, 5],
                [ 8, 9],
                [ 6, 7],
                [16, 17]]),
             Tensor(shape=[5, 1], dtype=Int32, value=
                [[1],
                [1],
                [1],
                [1],
                [1]]),
             Tensor(shape=[5, 4], dtype=Float32, value=
                [[ 0.00000000e+00, 1.00000000e+00, 2.00000000e+00, 3.00000000e+00],
                [ 8.00000000e+00, 9.00000000e+00, 1.00000000e+01, 1.10000000e+01],
                [ 1.60000000e+01, 1.70000000e+01, 1.80000000e+01, 1.90000000e+01],
                [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01],
                [ 3.20000000e+01, 3.30000000e+01, 3.40000000e+01, 3.50000000e+01]]))
    """

    @prim_attr_register
    def __init__(self, capacity, batch_size, buffer_shape, buffer_dtype, seed=0, unique=False):
        """Initialize BufferSample."""
        self.init_prim_io_names(inputs=["buffer"], outputs=["sample"])
        validator.check_value_type("shape of init data", buffer_shape, [tuple, list], self.name)
        validator.check_int(capacity, 1, validator.GE, "capacity", self.name)
        self._batch_size = batch_size
        self._buffer_shape = buffer_shape
        self._buffer_dtype = buffer_dtype
        self._n = len(buffer_shape)
        validator.check_int(self._batch_size, capacity, validator.LE, "batchsize", self.name)
        self.add_prim_attr('capacity', capacity)
        self.add_prim_attr('seed', seed)
        self.add_prim_attr('unique', unique)
        buffer_elements = []
        for shape in buffer_shape:
            buffer_elements.append(reduce(lambda x, y: x * y, shape))
        self.add_prim_attr('buffer_elements', buffer_elements)
        self.add_prim_attr('buffer_dtype', buffer_dtype)
        self.add_prim_attr('side_effect_mem', True)
        if context.get_context('device_target') == "Ascend":
            self.add_prim_attr('device_target', "CPU")

    def infer_shape(self, data_shape, count_shape, head_shape):
        validator.check_value_type("shape of data", data_shape, [tuple, list], self.name)
        out_shapes = []
        for i in range(self._n):
            out_shapes.append((self._batch_size,) + self._buffer_shape[i])
        return tuple(out_shapes)

    def infer_dtype(self, data_type, count_type, head_type):
        validator.check_type_name("count type", count_type, (mstype.int32), self.name)
        validator.check_type_name("head type", head_type, (mstype.int32), self.name)
        return tuple(self._buffer_dtype)


[docs]class BufferAppend(PrimitiveWithInfer):
    r"""
    In reinforcement learning, the experience data is collected in each step. We use `BufferAppend` to
    push data to the bottom of buffer under the First-In-First-Out rule.

    .. warning::
        This is an experimental API that is subject to change or deletion.

    Args:
        capacity (int64): Capacity of the buffer, must be non-negative.
        buffer_shape (tuple(shape)): The shape of an buffer.
        buffer_dtype (tuple(type)): The type of an buffer.

    Inputs:
        - **data** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents replaybuffer,
          each tensor is described by the `buffer_shape` and `buffer_type`.
        - **exp** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents one list of experience data,
          each tensor is described by the `buffer_shape` and `buffer_type`.
        - **count** (Parameter) - The count means the real available size of the buffer,
          data type: int32.
        - **head** (Parameter) - The position of the first data in buffer, data type: int32.

    Outputs:
        None.

    Raises:
        ValueError: If `count` and `head` is not an integer.
        ValueError: If `capacity` is not a positive integer.
        ValueError: If length of `data` is not equal to length of `exp`.
        ValueError: If dim of data is equal to dim of exp, but `data[1:]` is not equal to the shape in `exp`.
        ValueError: If the shape of `data[1:]` is not equal to the shape in `exp`.
        TypeError: If the type in `exp` is not the same with `data`.

    Supported Platforms:
        ``GPU`` ``CPU``

    Examples:
        >>> capacity = 100
        >>> count = Parameter(Tensor(5, ms.int32), name="count")
        >>> head = Parameter(Tensor(0, ms.int32), name="head")
        >>> shapes = [(4,), (2,), (1,), (4,)]
        >>> types = [ms.float32, ms.int32, ms.int32, ms.float32]
        >>> buffer = [Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="states"),
        ...           Parameter(Tensor(np.arange(100 * 2).reshape(100, 2).astype(np.int32)), name="action"),
        ...           Parameter(Tensor(np.ones((100, 1)).astype(np.int32)), name="reward"),
        ...           Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="state_")]
        >>> exp = [Tensor(np.array([2, 2, 2, 2]), ms.float32), Tensor(np.array([0, 0]), ms.int32),
        ...        Tensor(np.array([0]), ms.int32), Tensor(np.array([3, 3, 3, 3]), ms.float32)]
        >>> batch_exp = [Tensor(np.array([[2, 2, 2, 2], [2, 2, 2, 2]]), ms.float32),
        ...              Tensor(np.array([[0, 0], [0, 0]]), ms.int32),
        ...              Tensor(np.array([[0], [0]]), ms.int32),
        ...              Tensor(np.array([[3, 3, 3, 3], [3, 3, 3, 3]]), ms.float32)]
        >>> buffer_append = ops.BufferAppend(capacity, shapes, types)
        >>> buffer_append(buffer, exp, count, head)
        >>> buffer_append(buffer, batch_exp, count, head)
    """
    @prim_attr_register
    def __init__(self, capacity, buffer_shape, buffer_dtype):
        """Initialize BufferAppend."""
        validator.check_int(capacity, 1, validator.GE, "capacity", self.name)
        self.add_prim_attr('capacity', capacity)
        buffer_elements = []
        for shape in buffer_shape:
            buffer_elements.append(reduce(lambda x, y: x * y, shape))
        self.add_prim_attr('buffer_elements', buffer_elements)
        self.add_prim_attr('buffer_dtype', buffer_dtype)
        self.add_prim_attr('side_effect_mem', True)
        if context.get_context('device_target') == "Ascend":
            self.add_prim_attr('device_target', "CPU")

    def infer_shape(self, data_shape, exp_shape, count_shape, head_shape):
        validator.check_equal_int(len(data_shape), len(exp_shape), "exp elements", self.name)
        exp_batch = 1
        if len(data_shape[0]) == len(exp_shape[0]):
            exp_batch = exp_shape[0][0]
            for i in range(len(data_shape)):
                if len(data_shape[i]) != len(exp_shape[i]):
                    raise ValueError(f"For '{self.name}', the dimension of {i}th 'exp_shape' must be equal to "
                                     f"the dimension of {i}th 'data_shape', but got the {i}th 'exp_shape': "
                                     f"{exp_shape[i]}, the {i}th 'data_shape': {data_shape[i]}.")
                if data_shape[i][0] < exp_shape[i][0]:
                    raise ValueError(f"For '{self.name}', the first dimension of {i}th 'data_shape' must be greater "
                                     f"than or equal to the first dimension of {i}th 'exp_shape', but got the {i}th "
                                     f"'exp_shape': {exp_shape[i]}, the {i}th 'data_shape': {data_shape[i]}.")
        else:
            for i in range(len(data_shape)):
                if data_shape[i][1:] != exp_shape[i]:
                    raise ValueError(f"For '{self.name}', the {i}th 'exp_shape' must be equal to the {i}th 'data_shape'"
                                     f"which excepts the first dimension. but got the {i}th 'exp_shape': "
                                     f"{exp_shape[i]}, the {i}th 'data_shape': {data_shape[i]}.")
        self.add_prim_attr('exp_batch', exp_batch)
        return count_shape

    def infer_dtype(self, data_type, exp_type, count_type, head_type):
        for i in range(len(data_type)):
            if data_type[i] != exp_type[i]:
                raise TypeError(f"For '{self.name}', each tensor in 'exp' must has the same type with 'data', but got "
                                f"'data_type': {data_type}, 'exp_type': {exp_type}.")
        validator.check_type_name("count type", count_type, (mstype.int32), self.name)
        validator.check_type_name("head type", head_type, (mstype.int32), self.name)
        return count_type


[docs]class BufferGetItem(PrimitiveWithInfer):
    r"""
    Get the data from buffer in the position of input index.

    .. warning::
        This is an experimental API that is subject to change or deletion.

    Args:
        capacity (int64): Capacity of the buffer, must be non-negative.
        buffer_shape (tuple(shape)): The shape of an buffer.
        buffer_dtype (tuple(type)): The type of an buffer.

    Inputs:
        - **data** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents replaybuffer,
          each tensor is described by the `buffer_shape` and `buffer_type`.
        - **count** (Parameter) - The count means the real available size of the buffer,
          data type: int32.
        - **head** (Parameter) - The position of the first data in buffer, data type: int32.
        - **index** (int64) - The position of the data in buffer.

    Outputs:
        tuple(Tensor). The shape is `buffer_shape`. The dtype is `buffer_dtype`.

    Raises:
        ValueError: If `count` and `head` is not an integer.
        ValueError: If `capacity` is not a positive integer.
        TypeError: If `buffer_shape` is not a tuple.

    Supported Platforms:
        ``GPU`` ``CPU``

    Examples:
        >>> capacity = 100
        >>> index = 3
        >>> count = Parameter(Tensor(5, ms.int32), name="count")
        >>> head = Parameter(Tensor(0, ms.int32), name="head")
        >>> shapes = [(4,), (2,), (1,), (4,)]
        >>> types = [ms.float32, ms.int32, ms.int32, ms.float32]
        >>> buffer = [Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="states"),
        ...           Parameter(Tensor(np.arange(100 * 2).reshape(100, 2).astype(np.int32)), name="action"),
        ...           Parameter(Tensor(np.ones((100, 1)).astype(np.int32)), name="reward"),
        ...           Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="state_")]
        >>> buffer_get = ops.BufferGetItem(capacity, shapes, types)
        >>> output = buffer_get(buffer, count, head, index)
        >>> print(output)
            (Tensor(shape=[4], dtype=Float32, value=
                [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01]),
             Tensor(shape=[2], dtype=Int32, value= [6, 7]),
             Tensor(shape=[1], dtype=Int32, value= [1]),
             Tensor(shape=[4], dtype=Float32, value=
                [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01]))

    """
    @prim_attr_register
    def __init__(self, capacity, buffer_shape, buffer_dtype):
        """Initialize BufferGetItem."""
        self.init_prim_io_names(inputs=["buffer"], outputs=["item"])
        validator.check_int(capacity, 1, validator.GE, "capacity", self.name)
        self._buffer_shape = buffer_shape
        self._buffer_dtype = buffer_dtype
        self._n = len(buffer_shape)
        buffer_elements = []
        for shape in buffer_shape:
            buffer_elements.append(reduce(lambda x, y: x * y, shape))
        self.add_prim_attr('buffer_elements', buffer_elements)
        self.add_prim_attr('buffer_dtype', buffer_dtype)
        self.add_prim_attr('capacity', capacity)
        self.add_prim_attr('side_effect_mem', True)
        if context.get_context('device_target') == "Ascend":
            self.add_prim_attr('device_target', "CPU")

    def infer_shape(self, data_shape, count_shape, head_shape, index_shape):
        validator.check_value_type("shape of data", data_shape, [tuple, list], self.name)
        return tuple(self._buffer_shape)

    def infer_dtype(self, data_type, count_type, head_type, index_type):
        validator.check_type_name("count type", count_type, (mstype.int32), self.name)
        validator.check_type_name("head type", head_type, (mstype.int32), self.name)
        validator.check_type_name("index type", index_type, (mstype.int64, mstype.int32), self.name)
        return tuple(self._buffer_dtype)