Source code for mindspore.ops.operations.rl_ops

# Copyright 2021-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Operators for reinforce learning."""

from functools import reduce
import mindspore.context as context
from mindspore import _checkparam as validator
from ...common import dtype as mstype
from ..primitive import prim_attr_register, PrimitiveWithInfer


[docs]class BufferSample(PrimitiveWithInfer): r""" In reinforcement learning, the data is sampled from the replaybuffer randomly. Returns the tuple tensor with the given shape, decided by the given batchsize. .. warning:: This is an experimental API that is subject to change or deletion. Args: capacity (int64): Capacity of the buffer, must be non-negative. batch_size (int64): The size of the sampled data, lessequal to `capacity`. buffer_shape (tuple(shape)): The shape of an buffer. buffer_dtype (tuple(type)): The type of an buffer. seed (int64): Random seed for sample. Default: 0. If use the default seed, it will generate a ramdom one in kernel. Set a number other than `0` to keep a specific seed. Default: 0. unique (bool): Whether the sampled data is strictly unique. Setting it to False has a better performance. Default: False Inputs: - **data** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents replaybuffer, each tensor is described by the `buffer_shape` and `buffer_type`. - **count** (Parameter) - The count means the real available size of the buffer, data type: int32. - **head** (Parameter) - The position of the first data in buffer, data type: int32. Outputs: tuple(Tensor). The shape is `batch_size` * `buffer_shape`. The dtype is `buffer_dtype`. Raises: TypeError: If `buffer_shape` is not a tuple. ValueError: If batch_size is larger than capacity. ValueError: If `capacity` is not a positive integer. Supported Platforms: ``GPU`` ``CPU`` Examples: >>> capacity = 100 >>> batch_size = 5 >>> count = Parameter(Tensor(5, ms.int32), name="count") >>> head = Parameter(Tensor(0, ms.int32), name="head") >>> shapes = [(4,), (2,), (1,), (4,)] >>> types = [ms.float32, ms.int32, ms.int32, ms.float32] >>> buffer = [Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="states"), ... Parameter(Tensor(np.arange(100 * 2).reshape(100, 2).astype(np.int32)), name="action"), ... Parameter(Tensor(np.ones((100, 1)).astype(np.int32)), name="reward"), ... Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="state_")] >>> buffer_sample = ops.BufferSample(capacity, batch_size, shapes, types) >>> output = buffer_sample(buffer, count, head) >>> print(output) (Tensor(shape=[5, 4], dtype=Float32, value= [[ 0.00000000e+00, 1.00000000e+00, 2.00000000e+00, 3.00000000e+00], [ 8.00000000e+00, 9.00000000e+00, 1.00000000e+01, 1.10000000e+01], [ 1.60000000e+01, 1.70000000e+01, 1.80000000e+01, 1.90000000e+01], [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01], [ 3.20000000e+01, 3.30000000e+01, 3.40000000e+01, 3.50000000e+01]]), Tensor(shape=[5, 2], dtype=Int32, value= [[ 0, 1], [ 4, 5], [ 8, 9], [ 6, 7], [16, 17]]), Tensor(shape=[5, 1], dtype=Int32, value= [[1], [1], [1], [1], [1]]), Tensor(shape=[5, 4], dtype=Float32, value= [[ 0.00000000e+00, 1.00000000e+00, 2.00000000e+00, 3.00000000e+00], [ 8.00000000e+00, 9.00000000e+00, 1.00000000e+01, 1.10000000e+01], [ 1.60000000e+01, 1.70000000e+01, 1.80000000e+01, 1.90000000e+01], [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01], [ 3.20000000e+01, 3.30000000e+01, 3.40000000e+01, 3.50000000e+01]])) """ @prim_attr_register def __init__(self, capacity, batch_size, buffer_shape, buffer_dtype, seed=0, unique=False): """Initialize BufferSample.""" self.init_prim_io_names(inputs=["buffer"], outputs=["sample"]) validator.check_value_type("shape of init data", buffer_shape, [tuple, list], self.name) validator.check_int(capacity, 1, validator.GE, "capacity", self.name) self._batch_size = batch_size self._buffer_shape = buffer_shape self._buffer_dtype = buffer_dtype self._n = len(buffer_shape) validator.check_int(self._batch_size, capacity, validator.LE, "batchsize", self.name) self.add_prim_attr('capacity', capacity) self.add_prim_attr('seed', seed) self.add_prim_attr('unique', unique) buffer_elements = [] for shape in buffer_shape: buffer_elements.append(reduce(lambda x, y: x * y, shape)) self.add_prim_attr('buffer_elements', buffer_elements) self.add_prim_attr('buffer_dtype', buffer_dtype) self.add_prim_attr('side_effect_mem', True) if context.get_context('device_target') == "Ascend": self.add_prim_attr('device_target', "CPU") def infer_shape(self, data_shape, count_shape, head_shape): validator.check_value_type("shape of data", data_shape, [tuple, list], self.name) out_shapes = [] for i in range(self._n): out_shapes.append((self._batch_size,) + self._buffer_shape[i]) return tuple(out_shapes) def infer_dtype(self, data_type, count_type, head_type): validator.check_type_name("count type", count_type, (mstype.int32), self.name) validator.check_type_name("head type", head_type, (mstype.int32), self.name) return tuple(self._buffer_dtype)
[docs]class BufferAppend(PrimitiveWithInfer): r""" In reinforcement learning, the experience data is collected in each step. We use `BufferAppend` to push data to the bottom of buffer under the First-In-First-Out rule. .. warning:: This is an experimental API that is subject to change or deletion. Args: capacity (int64): Capacity of the buffer, must be non-negative. buffer_shape (tuple(shape)): The shape of an buffer. buffer_dtype (tuple(type)): The type of an buffer. Inputs: - **data** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents replaybuffer, each tensor is described by the `buffer_shape` and `buffer_type`. - **exp** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents one list of experience data, each tensor is described by the `buffer_shape` and `buffer_type`. - **count** (Parameter) - The count means the real available size of the buffer, data type: int32. - **head** (Parameter) - The position of the first data in buffer, data type: int32. Outputs: None. Raises: ValueError: If `count` and `head` is not an integer. ValueError: If `capacity` is not a positive integer. ValueError: If length of `data` is not equal to length of `exp`. ValueError: If dim of data is equal to dim of exp, but `data[1:]` is not equal to the shape in `exp`. ValueError: If the shape of `data[1:]` is not equal to the shape in `exp`. TypeError: If the type in `exp` is not the same with `data`. Supported Platforms: ``GPU`` ``CPU`` Examples: >>> capacity = 100 >>> count = Parameter(Tensor(5, ms.int32), name="count") >>> head = Parameter(Tensor(0, ms.int32), name="head") >>> shapes = [(4,), (2,), (1,), (4,)] >>> types = [ms.float32, ms.int32, ms.int32, ms.float32] >>> buffer = [Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="states"), ... Parameter(Tensor(np.arange(100 * 2).reshape(100, 2).astype(np.int32)), name="action"), ... Parameter(Tensor(np.ones((100, 1)).astype(np.int32)), name="reward"), ... Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="state_")] >>> exp = [Tensor(np.array([2, 2, 2, 2]), ms.float32), Tensor(np.array([0, 0]), ms.int32), ... Tensor(np.array([0]), ms.int32), Tensor(np.array([3, 3, 3, 3]), ms.float32)] >>> batch_exp = [Tensor(np.array([[2, 2, 2, 2], [2, 2, 2, 2]]), ms.float32), ... Tensor(np.array([[0, 0], [0, 0]]), ms.int32), ... Tensor(np.array([[0], [0]]), ms.int32), ... Tensor(np.array([[3, 3, 3, 3], [3, 3, 3, 3]]), ms.float32)] >>> buffer_append = ops.BufferAppend(capacity, shapes, types) >>> buffer_append(buffer, exp, count, head) >>> buffer_append(buffer, batch_exp, count, head) """ @prim_attr_register def __init__(self, capacity, buffer_shape, buffer_dtype): """Initialize BufferAppend.""" validator.check_int(capacity, 1, validator.GE, "capacity", self.name) self.add_prim_attr('capacity', capacity) buffer_elements = [] for shape in buffer_shape: buffer_elements.append(reduce(lambda x, y: x * y, shape)) self.add_prim_attr('buffer_elements', buffer_elements) self.add_prim_attr('buffer_dtype', buffer_dtype) self.add_prim_attr('side_effect_mem', True) if context.get_context('device_target') == "Ascend": self.add_prim_attr('device_target', "CPU") def infer_shape(self, data_shape, exp_shape, count_shape, head_shape): validator.check_equal_int(len(data_shape), len(exp_shape), "exp elements", self.name) exp_batch = 1 if len(data_shape[0]) == len(exp_shape[0]): exp_batch = exp_shape[0][0] for i in range(len(data_shape)): if len(data_shape[i]) != len(exp_shape[i]): raise ValueError(f"For '{self.name}', the dimension of {i}th 'exp_shape' must be equal to " f"the dimension of {i}th 'data_shape', but got the {i}th 'exp_shape': " f"{exp_shape[i]}, the {i}th 'data_shape': {data_shape[i]}.") if data_shape[i][0] < exp_shape[i][0]: raise ValueError(f"For '{self.name}', the first dimension of {i}th 'data_shape' must be greater " f"than or equal to the first dimension of {i}th 'exp_shape', but got the {i}th " f"'exp_shape': {exp_shape[i]}, the {i}th 'data_shape': {data_shape[i]}.") else: for i in range(len(data_shape)): if data_shape[i][1:] != exp_shape[i]: raise ValueError(f"For '{self.name}', the {i}th 'exp_shape' must be equal to the {i}th 'data_shape'" f"which excepts the first dimension. but got the {i}th 'exp_shape': " f"{exp_shape[i]}, the {i}th 'data_shape': {data_shape[i]}.") self.add_prim_attr('exp_batch', exp_batch) return count_shape def infer_dtype(self, data_type, exp_type, count_type, head_type): for i in range(len(data_type)): if data_type[i] != exp_type[i]: raise TypeError(f"For '{self.name}', each tensor in 'exp' must has the same type with 'data', but got " f"'data_type': {data_type}, 'exp_type': {exp_type}.") validator.check_type_name("count type", count_type, (mstype.int32), self.name) validator.check_type_name("head type", head_type, (mstype.int32), self.name) return count_type
[docs]class BufferGetItem(PrimitiveWithInfer): r""" Get the data from buffer in the position of input index. .. warning:: This is an experimental API that is subject to change or deletion. Args: capacity (int64): Capacity of the buffer, must be non-negative. buffer_shape (tuple(shape)): The shape of an buffer. buffer_dtype (tuple(type)): The type of an buffer. Inputs: - **data** (tuple(Parameter(Tensor))) - The tuple(Tensor) represents replaybuffer, each tensor is described by the `buffer_shape` and `buffer_type`. - **count** (Parameter) - The count means the real available size of the buffer, data type: int32. - **head** (Parameter) - The position of the first data in buffer, data type: int32. - **index** (int64) - The position of the data in buffer. Outputs: tuple(Tensor). The shape is `buffer_shape`. The dtype is `buffer_dtype`. Raises: ValueError: If `count` and `head` is not an integer. ValueError: If `capacity` is not a positive integer. TypeError: If `buffer_shape` is not a tuple. Supported Platforms: ``GPU`` ``CPU`` Examples: >>> capacity = 100 >>> index = 3 >>> count = Parameter(Tensor(5, ms.int32), name="count") >>> head = Parameter(Tensor(0, ms.int32), name="head") >>> shapes = [(4,), (2,), (1,), (4,)] >>> types = [ms.float32, ms.int32, ms.int32, ms.float32] >>> buffer = [Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="states"), ... Parameter(Tensor(np.arange(100 * 2).reshape(100, 2).astype(np.int32)), name="action"), ... Parameter(Tensor(np.ones((100, 1)).astype(np.int32)), name="reward"), ... Parameter(Tensor(np.arange(100 * 4).reshape(100, 4).astype(np.float32)), name="state_")] >>> buffer_get = ops.BufferGetItem(capacity, shapes, types) >>> output = buffer_get(buffer, count, head, index) >>> print(output) (Tensor(shape=[4], dtype=Float32, value= [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01]), Tensor(shape=[2], dtype=Int32, value= [6, 7]), Tensor(shape=[1], dtype=Int32, value= [1]), Tensor(shape=[4], dtype=Float32, value= [ 1.20000000e+01, 1.30000000e+01, 1.40000000e+01, 1.50000000e+01])) """ @prim_attr_register def __init__(self, capacity, buffer_shape, buffer_dtype): """Initialize BufferGetItem.""" self.init_prim_io_names(inputs=["buffer"], outputs=["item"]) validator.check_int(capacity, 1, validator.GE, "capacity", self.name) self._buffer_shape = buffer_shape self._buffer_dtype = buffer_dtype self._n = len(buffer_shape) buffer_elements = [] for shape in buffer_shape: buffer_elements.append(reduce(lambda x, y: x * y, shape)) self.add_prim_attr('buffer_elements', buffer_elements) self.add_prim_attr('buffer_dtype', buffer_dtype) self.add_prim_attr('capacity', capacity) self.add_prim_attr('side_effect_mem', True) if context.get_context('device_target') == "Ascend": self.add_prim_attr('device_target', "CPU") def infer_shape(self, data_shape, count_shape, head_shape, index_shape): validator.check_value_type("shape of data", data_shape, [tuple, list], self.name) return tuple(self._buffer_shape) def infer_dtype(self, data_type, count_type, head_type, index_type): validator.check_type_name("count type", count_type, (mstype.int32), self.name) validator.check_type_name("head type", head_type, (mstype.int32), self.name) validator.check_type_name("index type", index_type, (mstype.int64, mstype.int32), self.name) return tuple(self._buffer_dtype)