Source code for mindspore_rl.core.uniform_replay_buffer
# Copyright 2021-2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Implementation of Replay Buffer class.
"""
import mindspore as ms
import numpy as np
from mindspore import Tensor, context, get_seed, nn
from mindspore.common.parameter import Parameter, ParameterTuple
from mindspore.ops import operations as P
def _create_tensor(capacity, shapes, types):
"""
Create tensors of replay buffer, and store them into a list.
Args:
capacity (int): capacity of the buffer.
shapes (List[int]): the shape of the tensor.
types (List[mindspore.dtype]): the type of the tensor.
Returns:
buffer(List[Tensor]), a list of tensor which states for the replay buffer
"""
buffer = []
for i, shape in enumerate(shapes):
buffer.append(
Parameter(
Tensor(np.zeros(((capacity,) + shape)), types[i]),
name=f"buffer_{i}",
requires_grad=False,
)
)
return buffer
[docs]class UniformReplayBuffer(nn.Cell):
"""
The replay buffer class.
The replay buffer will store the experience from environment. In replay
buffer, each element is a list of tensors. Therefore, the constructor of the
UniformReplayBuffer class takes the shape and type of each tensor as an argument.
Args:
sample_size (int): size for sampling from the buffer.
capacity (int): the capacity of the buffer.
shapes (list[int]): the shape of each tensor in a buffer element.
types (list[mindspore.dtype]): the data type of each tensor in a buffer element.
Examples:
>>> import mindspore as ms
>>> from mindspore_rl.core.uniform_replay_buffer import UniformReplayBuffer
>>> sample_size = 10
>>> capacity = 10000
>>> shapes = [(4,), (1,), (1,), (4,)]
>>> types = [ms.float32, ms.int32, ms.float32, ms.float32]
>>> replaybuffer = UniformReplayBuffer(sample_size, capacity, shapes, types)
>>> print(replaybuffer)
UniformReplayBuffer<>
"""
def __init__(self, sample_size, capacity, shapes, types):
nn.Cell.__init__(self)
self.buffer = ParameterTuple(_create_tensor(capacity, shapes, types))
self._capacity = capacity
self.count = Parameter(Tensor(0, ms.int32), name="count", requires_grad=False)
self.head = Parameter(Tensor(0, ms.int32), name="head", requires_grad=False)
self.zero = Tensor(0, ms.int32)
self.buffer_append = P.BufferAppend(self._capacity, shapes, types)
self.buffer_get = P.BufferGetItem(self._capacity, shapes, types)
seed = get_seed()
if seed is None:
seed = 0
self.buffer_sample = P.BufferSample(
self._capacity, sample_size, shapes, types, seed
)
if context.get_context("device_target") in ["Ascend"]:
self.buffer_append.add_prim_attr("primitive_target", "CPU")
self.buffer_get.add_prim_attr("primitive_target", "CPU")
self.buffer_sample.add_prim_attr("primitive_target", "CPU")
self.reshape = P.Reshape()
self.assign = P.Assign()
self.greater_equal = P.GreaterEqual()
self.capacity_tensor = Tensor([capacity], ms.int32)
[docs] def insert(self, exp):
"""
Insert an element to the buffer. If the buffer is full, FIFO strategy will be used to
replace the element in the buffer.
Args:
exp (list[Tensor]): insert a list of tensor which matches with the initialized shape
and type into the buffer.
Returns:
element (list[Tensor]), return the whole buffer after insertion
"""
self.buffer_append(self.buffer, exp, self.count, self.head)
return self.buffer
[docs] def get_item(self, index):
"""
Get an element from the replaybuffer in specific position(index).
Args:
index (int): the location of the item.
Returns:
element (List[Tensor]), the element from the buffer.
"""
return self.buffer_get(self.buffer, self.count, self.head, index)
[docs] def sample(self):
"""
Sampling the replaybuffer, which means that it will randomly choose a set of element
and output them.
Returns:
data (Tuple(Tensor)), A set of sampled elements from the buffer.
"""
return self.buffer_sample(self.buffer, self.count, self.head)
[docs] def reset(self):
"""
Reset the replaybuffer. It changes the value of self.count to zero.
Returns:
success (boolean), whether the reset successful or not.
"""
success = self.assign(self.count, self.zero)
return success
[docs] def size(self):
"""
Return the size of the replybuffer.
Returns:
size (int), the number of element in the replaybuffer.
"""
return self.count.asnumpy()
[docs] def full(self):
"""
Check if the replaybuffer is full or not.
Returns:
Full(bool), ``True`` if the replaybuffer is full, ``False`` otherwise.
"""
count = self.reshape(self.count, (1,))
capacity = self.reshape(self.capacity_tensor, (1,))
return self.greater_equal(count, capacity)