Source code for mindspore_rl.core.uniform_replay_buffer

# Copyright 2021-2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Implementation of Replay Buffer class.
"""

import mindspore as ms
import numpy as np
from mindspore import Tensor, context, get_seed, nn
from mindspore.common.parameter import Parameter, ParameterTuple
from mindspore.ops import operations as P


def _create_tensor(capacity, shapes, types):
    """
    Create tensors of replay buffer, and store them into a list.

    Args:
        capacity (int): capacity of the buffer.
        shapes (List[int]): the shape of the tensor.
        types (List[mindspore.dtype]): the type of the tensor.

    Returns:
        buffer(List[Tensor]), a list of tensor which states for the replay buffer
    """
    buffer = []
    for i, shape in enumerate(shapes):
        buffer.append(
            Parameter(
                Tensor(np.zeros(((capacity,) + shape)), types[i]),
                name=f"buffer_{i}",
                requires_grad=False,
            )
        )
    return buffer


[docs]class UniformReplayBuffer(nn.Cell): """ The replay buffer class. The replay buffer will store the experience from environment. In replay buffer, each element is a list of tensors. Therefore, the constructor of the UniformReplayBuffer class takes the shape and type of each tensor as an argument. Args: sample_size (int): size for sampling from the buffer. capacity (int): the capacity of the buffer. shapes (list[int]): the shape of each tensor in a buffer element. types (list[mindspore.dtype]): the data type of each tensor in a buffer element. Examples: >>> import mindspore as ms >>> from mindspore_rl.core.uniform_replay_buffer import UniformReplayBuffer >>> sample_size = 10 >>> capacity = 10000 >>> shapes = [(4,), (1,), (1,), (4,)] >>> types = [ms.float32, ms.int32, ms.float32, ms.float32] >>> replaybuffer = UniformReplayBuffer(sample_size, capacity, shapes, types) >>> print(replaybuffer) UniformReplayBuffer<> """ def __init__(self, sample_size, capacity, shapes, types): nn.Cell.__init__(self) self.buffer = ParameterTuple(_create_tensor(capacity, shapes, types)) self._capacity = capacity self.count = Parameter(Tensor(0, ms.int32), name="count", requires_grad=False) self.head = Parameter(Tensor(0, ms.int32), name="head", requires_grad=False) self.zero = Tensor(0, ms.int32) self.buffer_append = P.BufferAppend(self._capacity, shapes, types) self.buffer_get = P.BufferGetItem(self._capacity, shapes, types) seed = get_seed() if seed is None: seed = 0 self.buffer_sample = P.BufferSample( self._capacity, sample_size, shapes, types, seed ) if context.get_context("device_target") in ["Ascend"]: self.buffer_append.add_prim_attr("primitive_target", "CPU") self.buffer_get.add_prim_attr("primitive_target", "CPU") self.buffer_sample.add_prim_attr("primitive_target", "CPU") self.reshape = P.Reshape() self.assign = P.Assign() self.greater_equal = P.GreaterEqual() self.capacity_tensor = Tensor([capacity], ms.int32)
[docs] def insert(self, exp): """ Insert an element to the buffer. If the buffer is full, FIFO strategy will be used to replace the element in the buffer. Args: exp (list[Tensor]): insert a list of tensor which matches with the initialized shape and type into the buffer. Returns: element (list[Tensor]), return the whole buffer after insertion """ self.buffer_append(self.buffer, exp, self.count, self.head) return self.buffer
[docs] def get_item(self, index): """ Get an element from the replaybuffer in specific position(index). Args: index (int): the location of the item. Returns: element (List[Tensor]), the element from the buffer. """ return self.buffer_get(self.buffer, self.count, self.head, index)
[docs] def sample(self): """ Sampling the replaybuffer, which means that it will randomly choose a set of element and output them. Returns: data (Tuple(Tensor)), A set of sampled elements from the buffer. """ return self.buffer_sample(self.buffer, self.count, self.head)
[docs] def reset(self): """ Reset the replaybuffer. It changes the value of self.count to zero. Returns: success (boolean), whether the reset successful or not. """ success = self.assign(self.count, self.zero) return success
[docs] def size(self): """ Return the size of the replybuffer. Returns: size (int), the number of element in the replaybuffer. """ return self.count.asnumpy()
[docs] def full(self): """ Check if the replaybuffer is full or not. Returns: Full(bool), ``True`` if the replaybuffer is full, ``False`` otherwise. """ count = self.reshape(self.count, (1,)) capacity = self.reshape(self.capacity_tensor, (1,)) return self.greater_equal(count, capacity)