[{"data":1,"prerenderedAt":1732},["ShallowReactive",2],{"content-query-tNjXKvs5ok":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":1726,"_id":1727,"_source":1728,"_file":1729,"_stem":1730,"_extension":1731},"/technology-blogs/zh/3198","zh",false,"","深度学习经典算法详细介绍","DeepLearning","2024-06-13","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/11/28/6b32fcae144241df9e6cbdaec962c247.png","technology-blogs","基础知识",{"type":15,"children":16,"toc":1706},"root",[17,24,29,41,46,51,56,61,66,86,91,119,123,128,133,141,145,150,154,159,164,169,176,181,204,209,212,216,221,229,234,239,244,249,254,259,264,269,277,282,287,294,304,309,326,330,335,343,346,350,355,363,368,375,380,385,392,400,405,412,417,430,435,443,448,455,463,468,473,478,485,511,518,526,534,539,547,552,557,562,567,579,597,602,610,615,628,633,640,648,653,665,670,675,686,689,694,699,707,712,717,735,740,748,755,763,768,776,781,788,793,798,803,808,827,835,840,847,852,860,868,873,880,888,893,900,908,913,920,930,935,940,945,956,959,964,972,977,984,992,1004,1009,1017,1022,1029,1037,1042,1049,1057,1062,1069,1077,1082,1100,1105,1110,1133,1138,1145,1153,1158,1165,1173,1181,1186,1193,1201,1206,1211,1218,1226,1231,1242,1247,1250,1255,1263,1268,1275,1280,1287,1295,1300,1307,1312,1320,1325,1330,1335,1342,1350,1354,1359,1364,1369,1374,1381,1386,1393,1401,1406,1411,1415,1420,1425,1432,1437,1442,1450,1455,1462,1470,1475,1480,1488,1493,1501,1506,1513,1518,1525,1532,1544,1552,1557,1564,1569,1577,1585,1597,1602,1609,1617,1622,1630,1649,1662,1670,1675,1680,1685,1693,1698],{"type":18,"tag":19,"props":20,"children":21},"element","h1",{"id":8},[22],{"type":23,"value":8},"text",{"type":18,"tag":19,"props":25,"children":27},{"id":26},"deeplearning",[28],{"type":23,"value":9},{"type":18,"tag":30,"props":31,"children":32},"p",{},[33,35],{"type":23,"value":34},"2024年4月1日",{"type":18,"tag":36,"props":37,"children":38},"strong",{},[39],{"type":23,"value":40},"更新",{"type":18,"tag":30,"props":42,"children":43},{},[44],{"type":23,"value":45},"在此教程中，我们将对深度学习有一个基本的认识，并介绍几种常用的模型及算法，并对几个经典的模型及算法进行简单的代码实现。",{"type":18,"tag":19,"props":47,"children":49},{"id":48},"目录",[50],{"type":23,"value":48},{"type":18,"tag":30,"props":52,"children":53},{},[54],{"type":23,"value":55},"环境搭建",{"type":18,"tag":30,"props":57,"children":58},{},[59],{"type":23,"value":60},"学习路径",{"type":18,"tag":30,"props":62,"children":63},{},[64],{"type":23,"value":65},"基本介绍",{"type":18,"tag":67,"props":68,"children":69},"ul",{},[70,76,81],{"type":18,"tag":71,"props":72,"children":73},"li",{},[74],{"type":23,"value":75},"何为深度学习",{"type":18,"tag":71,"props":77,"children":78},{},[79],{"type":23,"value":80},"神经网络",{"type":18,"tag":71,"props":82,"children":83},{},[84],{"type":23,"value":85},"项目结构",{"type":18,"tag":30,"props":87,"children":88},{},[89],{"type":23,"value":90},"常用模型及算法",{"type":18,"tag":67,"props":92,"children":93},{},[94,99,104,109,114],{"type":18,"tag":71,"props":95,"children":96},{},[97],{"type":23,"value":98},"多层感知机(MLP)",{"type":18,"tag":71,"props":100,"children":101},{},[102],{"type":23,"value":103},"卷积神经网络(CNN)",{"type":18,"tag":71,"props":105,"children":106},{},[107],{"type":23,"value":108},"循环神经网络(RNN)",{"type":18,"tag":71,"props":110,"children":111},{},[112],{"type":23,"value":113},"LSTM长短期记忆神经网络",{"type":18,"tag":71,"props":115,"children":116},{},[117],{"type":23,"value":118},"强化学习(RL)",{"type":18,"tag":120,"props":121,"children":122},"hr",{},[],{"type":18,"tag":124,"props":125,"children":126},"h2",{"id":55},[127],{"type":23,"value":55},{"type":18,"tag":30,"props":129,"children":130},{},[131],{"type":23,"value":132},"在正式开始学习深度学习之前，我们需要先搭建MindSpore环境。首先选择想要安装的MindSpore版本，并执行命令：set MS_VERSION=版本号；接着根据系统环境信息在官网获取安装命令，如下图所示是作者选择的安装命令；最后执行命令：python -c \"import mindspore;mindspore.set_context(device_target='CPU');mindspore.run_check()\"以验证是否成功安装，如果看到命令行中输出MindSpore version: 版本号，则说明安装成功，如下图所示。",{"type":18,"tag":30,"props":134,"children":135},{},[136],{"type":18,"tag":137,"props":138,"children":140},"img",{"alt":7,"src":139},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/980c504ce44240a8a437400bdf01a5be.png",[],{"type":18,"tag":124,"props":142,"children":143},{"id":60},[144],{"type":23,"value":60},{"type":18,"tag":30,"props":146,"children":147},{},[148],{"type":23,"value":149},"对深度学习有了一定的了解之后，我们会知道深度学习离不开神经网络，可以从多层感知器(Multilayer Perceptron，简称 MLP)神经网络入手。 MLP是最基本的神经网络模型之一，它的结构比较简单，涉及的很多算法是我们学习更复杂的模型的基础，易于理解和实现，同时又有很好的可扩展性和通用性，可以应用于分类、回归等多种任务。学习 MLP 之后，你可以进一步学习卷积神经网络(Convolutional Neural Networks，简称 CNN)和循环神经网络(Recurrent Neural Networks，简称 RNN)等等，它们分别用于计算机视觉和自然语言处理等特定领域的问题。最后我们了解强化学习，它适用于序贯决策问题(涉及一系列有序的决策问题)。学习完各个算法的原理之后，我们可以进行简单的代码实现。",{"type":18,"tag":124,"props":151,"children":152},{"id":65},[153],{"type":23,"value":65},{"type":18,"tag":155,"props":156,"children":157},"h3",{"id":75},[158],{"type":23,"value":75},{"type":18,"tag":30,"props":160,"children":161},{},[162],{"type":23,"value":163},"从定义上说，深度学习是一种机器学习方法，它通过模拟人类大脑的工作原理来处理和分析大量数据，其核心是神经网络，它由多个层次的神经元组成，每一层神经元都负责处理不同的特征。深度学习通过多层次的神经网络来提取数据的特征，并利用这些特征进行分类、预测和其他任务。",{"type":18,"tag":30,"props":165,"children":166},{},[167],{"type":23,"value":168},"下图展示了人工智能、机器学习、深度学习之间的关系：",{"type":18,"tag":30,"props":170,"children":171},{},[172],{"type":18,"tag":137,"props":173,"children":175},{"alt":7,"src":174},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/5b5ba82360604b05a8f9acf3290dd1ac.png",[],{"type":18,"tag":30,"props":177,"children":178},{},[179],{"type":23,"value":180},"深度学习可被定义为以下四个基本网络框架中具有大量参数和层数的神经网络：",{"type":18,"tag":67,"props":182,"children":183},{},[184,189,194,199],{"type":18,"tag":71,"props":185,"children":186},{},[187],{"type":23,"value":188},"无监督预训练网络",{"type":18,"tag":71,"props":190,"children":191},{},[192],{"type":23,"value":193},"卷积神经网络",{"type":18,"tag":71,"props":195,"children":196},{},[197],{"type":23,"value":198},"循环神经网络",{"type":18,"tag":71,"props":200,"children":201},{},[202],{"type":23,"value":203},"递归神经网络",{"type":18,"tag":30,"props":205,"children":206},{},[207],{"type":23,"value":208},"可见深度学习与神经网络是密不可分的，因此我们接下来要介绍的就是神经网络。",{"type":18,"tag":120,"props":210,"children":211},{},[],{"type":18,"tag":155,"props":213,"children":214},{"id":80},[215],{"type":23,"value":80},{"type":18,"tag":30,"props":217,"children":218},{},[219],{"type":23,"value":220},"神经网络的学习方法和算法是深度学习的重要组成部分，神经网络在很多领域都有广泛的应用，例如计算机视觉、自然语言处理、语音识别等。",{"type":18,"tag":30,"props":222,"children":223},{},[224],{"type":18,"tag":36,"props":225,"children":226},{},[227],{"type":23,"value":228},"神经元",{"type":18,"tag":30,"props":230,"children":231},{},[232],{"type":23,"value":233},"神经网络由大量的神经元相互连接而成。每个神经元接受线性组合的输入后，最开始只是简单的线性加权，后来给每个神经元加上了非线性的激活函数，从而进行非线性变换后输出。每两个神经元之间的连接代表加权值，称之为权重(weight)。不同的权重和激活函数，则会导致神经网络不同的输出。",{"type":18,"tag":30,"props":235,"children":236},{},[237],{"type":23,"value":238},"把单个神经元组织在一起，便形成了神经网络。神经网络的每一可能由单个或多个神经元组成，每一层的输出将会成为下一层的输入数据。下面是一些常见的神经网络类型：",{"type":18,"tag":30,"props":240,"children":241},{},[242],{"type":23,"value":243},"1.前馈神经网络(Feedforward Neural Network)：前馈神经网络是最基本的神经网络类型，也是深度学习中最常见的神经网络类型。它由若干个神经元按照一定的层次结构组成，每个神经元接收上一层的输出，产生本层的输出，从而实现信息的传递和处理。",{"type":18,"tag":30,"props":245,"children":246},{},[247],{"type":23,"value":248},"2.卷积神经网络(Convolutional Neural Network)：卷积神经网络是一种专门用于图像处理和计算机视觉任务的神经网络类型。它通过卷积和池化等操作，可以提取图像中的特征，从而实现图像分类、目标检测、图像分割等任务。",{"type":18,"tag":30,"props":250,"children":251},{},[252],{"type":23,"value":253},"3.循环神经网络(Recurrent Neural Network)：循环神经网络是一种能够处理序列数据的神经网络类型。它通过记忆单元和门控机制等方式，可以处理任意长度的序列数据，从而实现自然语言处理、语音识别等任务。",{"type":18,"tag":30,"props":255,"children":256},{},[257],{"type":23,"value":258},"4.自编码器(Autoencoder)：自编码器是一种无监督学习的神经网络类型，它的目标是将输入数据进行压缩和解压缩，从而实现特征提取和降维等任务。",{"type":18,"tag":30,"props":260,"children":261},{},[262],{"type":23,"value":263},"5.深度置信网络(Deep Belief Network)：深度置信网络是一种由多个受限玻尔兹曼机组成的神经网络类型。它可以通过逐层贪心预训练和微调等方式，实现高效的特征学习和分类任务。",{"type":18,"tag":30,"props":265,"children":266},{},[267],{"type":23,"value":268},"除了以上列举的几种神经网络类型，还有众多其他的神经网络类型，如反向传播神经网络、Hopfield网络、Boltzmann机等。不同的神经网络类型适用于不同的任务和数据类型，需要根据具体的问题选择合适的神经网络类型。",{"type":18,"tag":30,"props":270,"children":271},{},[272],{"type":18,"tag":36,"props":273,"children":274},{},[275],{"type":23,"value":276},"神经网络组成",{"type":18,"tag":30,"props":278,"children":279},{},[280],{"type":23,"value":281},"人工神经网络（Artificial Neural Networks，简写为ANNs）是一种模仿动物神经网络行为特征，进行分布式并行信息处理的算法数学模型。这种网络依靠系统的复杂程度，通过调整内部大量节点之间相互连接的关系，从而达到处理信息的目的，并具有自学习和自适应的能力。神经网络类型众多，其中最为重要的是多层感知机。因此为了详细地描述神经网络，我们先从最简单的神经网络说起。",{"type":18,"tag":30,"props":283,"children":284},{},[285],{"type":23,"value":286},"人工神经网络由神经元模型构成，这种由许多神经元组成的信息处理网络具有并行分布结构。",{"type":18,"tag":30,"props":288,"children":289},{},[290],{"type":18,"tag":137,"props":291,"children":293},{"alt":7,"src":292},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/b8d6132baa2e4c46b157675501cdfc63.png",[],{"type":18,"tag":295,"props":296,"children":298},"pre",{"code":297},"其中圆形节点表示一个神经元，方形节点表示一组神经元。\n",[299],{"type":18,"tag":300,"props":301,"children":302},"code",{"__ignoreMap":7},[303],{"type":23,"value":297},{"type":18,"tag":155,"props":305,"children":307},{"id":306},"项目地址",[308],{"type":23,"value":306},{"type":18,"tag":67,"props":310,"children":311},{},[312],{"type":18,"tag":71,"props":313,"children":314},{},[315,317],{"type":23,"value":316},"模型仓库：",{"type":18,"tag":318,"props":319,"children":323},"a",{"href":320,"rel":321},"https://xihe.mindspore.cn/projects/hepucuncao/DeepLearning",[322],"nofollow",[324],{"type":23,"value":325},"MindSpore/hepucuncao/DeepLearning",{"type":18,"tag":155,"props":327,"children":328},{"id":85},[329],{"type":23,"value":85},{"type":18,"tag":30,"props":331,"children":332},{},[333],{"type":23,"value":334},"项目的目录分为两个部分：学习笔记readme文档，以及其中一些模型的简单实现代码放在train文件夹下。",{"type":18,"tag":295,"props":336,"children":338},{"code":337}," ├── train    # 相关代码目录\n │  ├── MLP.py    # MLP的一个简单实现\n │  ├── CNN.py    # CNN的一个简单实现\n │  └── RNN.py    # RNN的一个简单实现\n │  └── GD.py     # 梯度下降法的一个简单案例\n └── README.md\n",[339],{"type":18,"tag":300,"props":340,"children":341},{"__ignoreMap":7},[342],{"type":23,"value":337},{"type":18,"tag":120,"props":344,"children":345},{},[],{"type":18,"tag":124,"props":347,"children":348},{"id":90},[349],{"type":23,"value":90},{"type":18,"tag":155,"props":351,"children":353},{"id":352},"多层感知机mlp",[354],{"type":23,"value":98},{"type":18,"tag":30,"props":356,"children":357},{},[358],{"type":18,"tag":36,"props":359,"children":360},{},[361],{"type":23,"value":362},"感知机",{"type":18,"tag":30,"props":364,"children":365},{},[366],{"type":23,"value":367},"多层感知机(MLP，Multilayer Perceptron)是最简单的深度学习模型，属于前馈神经网络的一种，由多个全连接的神经网络层组成，适用于解决分类和回归问题，也叫人工神经网络(ANN,Artificial Neural Network)。简单的感知机如下图所示：",{"type":18,"tag":30,"props":369,"children":370},{},[371],{"type":18,"tag":137,"props":372,"children":374},{"alt":7,"src":373},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/62098625b3d640479c20af054521119c.png",[],{"type":18,"tag":30,"props":376,"children":377},{},[378],{"type":23,"value":379},"其中x1,x2,...,xn为感知机的输入，ω的计算与输入是无关的，相当于一个偏置。",{"type":18,"tag":30,"props":381,"children":382},{},[383],{"type":23,"value":384},"除了输入输出层，它中间可以有多个隐层，最简单的MLP只含一个隐层，即三层的结构，且层与层之间是全连接的，如下图：",{"type":18,"tag":30,"props":386,"children":387},{},[388],{"type":18,"tag":137,"props":389,"children":391},{"alt":7,"src":390},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/24ff56126f3f477f9baecea1ff82302f.png",[],{"type":18,"tag":30,"props":393,"children":394},{},[395],{"type":18,"tag":36,"props":396,"children":397},{},[398],{"type":23,"value":399},"多层感知机",{"type":18,"tag":30,"props":401,"children":402},{},[403],{"type":23,"value":404},"多层感知机由感知机推广而来，最主要的特点是有多个神经元层，因此也叫深度神经网络。相比于单独的感知机，多层感知机的第i层的每个神经元和第i−1、i+1层的每个神经元都有连接。且输出层可以不止有1个神经元。隐藏层可以只有1层，也可以有多层。输出层为多个神经元的神经网络例如下图所示：",{"type":18,"tag":30,"props":406,"children":407},{},[408],{"type":18,"tag":137,"props":409,"children":411},{"alt":7,"src":410},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/e05f46a30e7e4a7b83d278fa499ee628.png",[],{"type":18,"tag":30,"props":413,"children":414},{},[415],{"type":23,"value":416},"隐藏层与输入层是全连接的，假设输入层用向量X表示，则隐藏层的输出就是 f (ω1*X+b1)，ω1是权重(也叫连接系数)，权重越高说明这个特征越重要，b1是偏置，函数f可以是常用的sigmoid函数(也称Logistic函数)或者tanh函数:",{"type":18,"tag":30,"props":418,"children":419},{},[420,424,426],{"type":18,"tag":137,"props":421,"children":423},{"alt":7,"src":422},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/a195967e5edc4452b62ef890713f3141.png",[],{"type":23,"value":425}," ",{"type":18,"tag":137,"props":427,"children":429},{"alt":7,"src":428},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/01a0a976b94f48d9bdfef206cb21912b.png",[],{"type":18,"tag":30,"props":431,"children":432},{},[433],{"type":23,"value":434},"隐藏层到输出层可以看成是一个多类别的逻辑回归，也即softmax回归，所以输出层的输出就是softmax(ω2_X1+b2)，可以将输出结果正规化处理，这同样也是通过矩阵运算进行的，其中X1表示隐藏层的输出f(ω1_X+b1)。",{"type":18,"tag":30,"props":436,"children":437},{},[438],{"type":18,"tag":36,"props":439,"children":440},{},[441],{"type":23,"value":442},"输出的正规化",{"type":18,"tag":30,"props":444,"children":445},{},[446],{"type":23,"value":447},"我们可以利用以下公式来将输出结果正规化，使得所有元素的和为1，而每个元素的值代表了概率值，此时的神经网络将变成如下图所示：",{"type":18,"tag":30,"props":449,"children":450},{},[451],{"type":18,"tag":137,"props":452,"children":454},{"alt":7,"src":453},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/f6cd34197aba40db8ff54b12d0464c93.png",[],{"type":18,"tag":30,"props":456,"children":457},{},[458],{"type":18,"tag":36,"props":459,"children":460},{},[461],{"type":23,"value":462},"激活层",{"type":18,"tag":30,"props":464,"children":465},{},[466],{"type":23,"value":467},"通过上述两个线性方程的计算，我们可以得到线性输出，接着要对神经网络注入灵魂：激活层。",{"type":18,"tag":30,"props":469,"children":470},{},[471],{"type":23,"value":472},"激活层是神经网络中的一种层，其作用是在输入信号和输出信号之间添加一个非线性的转换函数，增加神经网络模型的非线性，使得网络可以更好地学习和表示复杂的非线性关系。激活层的意义在于增加模型的非线性表达能力，使得神经网络可以更好地处理复杂的输入数据，例如图像、文本和语音等。",{"type":18,"tag":30,"props":474,"children":475},{},[476],{"type":23,"value":477},"激活层常用的激活函数三种，分别是阶跃函数、Sigmoid和ReLU，如下图：",{"type":18,"tag":30,"props":479,"children":480},{},[481],{"type":18,"tag":137,"props":482,"children":484},{"alt":7,"src":483},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/668ca64f68bc4934be89be69b6156139.png",[],{"type":18,"tag":67,"props":486,"children":487},{},[488,493,498],{"type":18,"tag":71,"props":489,"children":490},{},[491],{"type":23,"value":492},"阶跃函数：当输入小于等于0时，输出0；当输入大于0时，输出1。",{"type":18,"tag":71,"props":494,"children":495},{},[496],{"type":23,"value":497},"Sigmoid：当输入趋近于正无穷/负无穷时，输出无限接近于1/0，这个函数可以把一个实数压缩到0-1之间。",{"type":18,"tag":71,"props":499,"children":500},{},[501,503],{"type":23,"value":502},"ReLU：当输入小于0时，输出0；当输入大于0时，输出等于输入。",{"type":18,"tag":295,"props":504,"children":506},{"code":505},"  注意：每个隐藏层计算（矩阵线性运算）之后，都需要加一层激活层，要不然该层线性计算是没有意义的。此时的神经网络变成了如下图所示的形式：\n",[507],{"type":18,"tag":300,"props":508,"children":509},{"__ignoreMap":7},[510],{"type":23,"value":505},{"type":18,"tag":30,"props":512,"children":513},{},[514],{"type":18,"tag":137,"props":515,"children":517},{"alt":7,"src":516},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/120ac062b24b49f79d6987fa12d60236.png",[],{"type":18,"tag":295,"props":519,"children":521},{"code":520},"一些选择激活函数的经验法则：\n如果输出是0、1值(二分类问题)，则输出层选择sigmoid函数，然后其它的所有单元都选择Relu函数。这是很多激活函数的默认选择，如果在隐藏层上不确定使用哪个激活函数，那么通常会使用Relu激活函数。有时，也会使用tanh激活函数。\n",[522],{"type":18,"tag":300,"props":523,"children":524},{"__ignoreMap":7},[525],{"type":23,"value":520},{"type":18,"tag":30,"props":527,"children":528},{},[529],{"type":18,"tag":36,"props":530,"children":531},{},[532],{"type":23,"value":533},"交叉熵损失",{"type":18,"tag":30,"props":535,"children":536},{},[537],{"type":23,"value":538},"通过Softmax层的正规化处理后，我们可以得到I、II、III和IV这四个类别分别对应的概率，但这只是神经网络计算得到的而非真实情况。因此，我们需要将SOftmax输出结果的好坏程度做一个“量化”，即将结果取对数的负数，这时概率越接近100%，该计算结果就越接近于0，说明结果越准确，我们把这个输出称为“交叉熵损失”。而我们训练神经网络的目的，就是尽可能减少“交叉熵损失”。",{"type":18,"tag":30,"props":540,"children":541},{},[542],{"type":18,"tag":36,"props":543,"children":544},{},[545],{"type":23,"value":546},"反向传播",{"type":18,"tag":30,"props":548,"children":549},{},[550],{"type":23,"value":551},"神经网络的计算主要有两种：前向传播（foward propagation, FP）作用于每一层的输入，通过逐层计算得到输出结果；反向传播（backward propagation, BP）作用于网络的输出，通过计算梯度由深到浅更新网络参数。",{"type":18,"tag":30,"props":553,"children":554},{},[555],{"type":23,"value":556},"上述过程就是神经网络的正向传播过程，用一句话来总结就是：神经网络的传播是形如Y=ω*X+b的线性矩阵运算，在隐藏层中加入激活层来给矩阵运算加入非线性，输出层的结果经过Softmax层处理为概率值，并通过交叉熵损失来量化当前神经网络的优劣。",{"type":18,"tag":30,"props":558,"children":559},{},[560],{"type":23,"value":561},"接着我们要进行的是反向传播，简而言之，反向传播是一个参数优化的过程，优化对象就是神经网络中的非确定参数ω和b。神经网络可以自动优化，通过反复迭代将输出的概率提高、交叉熵损失值下降，直到得到理想的参数值。",{"type":18,"tag":30,"props":563,"children":564},{},[565],{"type":23,"value":566},"那么，在反向传播过程中，很重要的一点就是：参数如何更新?或者说应该朝着什么方向更新?显然，参数应该是朝着目标损失函数下降最快的方向更新，即朝着梯度方向更新！",{"type":18,"tag":30,"props":568,"children":569},{},[570,572,577],{"type":23,"value":571},"在深度学习中，有三种最基本的梯度下降算法：",{"type":18,"tag":36,"props":573,"children":574},{},[575],{"type":23,"value":576},"SGD、BGD、MBGD",{"type":23,"value":578},"，他们各有优劣。根据不同的数据量和参数量，可以选择一种具体的实现形式，在训练神经网络是优化算法大体可以分为两类：1）调整学习率，使得优化更稳定；2）梯度估计修正，优化训练速度。",{"type":18,"tag":67,"props":580,"children":581},{},[582,587,592],{"type":18,"tag":71,"props":583,"children":584},{},[585],{"type":23,"value":586},"随机梯度下降法 (Stochastic Gradient Descent,SGD)：每次迭代(更新参数)只使用单个训练样本",{"type":18,"tag":71,"props":588,"children":589},{},[590],{"type":23,"value":591},"批量梯度下降法 (Batch Gradient Descent,BGD)：每次迭代更新中使用所有的训练样本",{"type":18,"tag":71,"props":593,"children":594},{},[595],{"type":23,"value":596},"小批量梯度下降法 (Mini-Batch Gradient Descent,MBGD)：折中了 BGD 和 SGD 的方法，每次迭代使用 batch_size 个训练样本进行计算(一般的mini-batch大小为64~256)",{"type":18,"tag":30,"props":598,"children":599},{},[600],{"type":23,"value":601},"接下来我们简单介绍一下在深度学习中应用十分广泛的梯度下降算法，它的主要目的是通过迭代找到目标函数的最小值，或者收敛到最小值。",{"type":18,"tag":30,"props":603,"children":604},{},[605],{"type":18,"tag":36,"props":606,"children":607},{},[608],{"type":23,"value":609},"梯度下降法",{"type":18,"tag":30,"props":611,"children":612},{},[613],{"type":23,"value":614},"梯度是微积分中一个重要的概念:",{"type":18,"tag":67,"props":616,"children":617},{},[618,623],{"type":18,"tag":71,"props":619,"children":620},{},[621],{"type":23,"value":622},"在单变量的函数中，梯度其实就是函数的微分，代表着函数在某个给定点的切线的斜率",{"type":18,"tag":71,"props":624,"children":625},{},[626],{"type":23,"value":627},"在多变量函数中，梯度是一个向量，向量有方向，梯度的方向就指出了函数在给定点的上升最快的方向(梯度的反方向就是函数在给定点下降最快的方向，这正是我们所需要的)",{"type":18,"tag":30,"props":629,"children":630},{},[631],{"type":23,"value":632},"数学公式如下：",{"type":18,"tag":30,"props":634,"children":635},{},[636],{"type":18,"tag":137,"props":637,"children":639},{"alt":7,"src":638},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/9683fdfb1994499991a82ca10d13a565.png",[],{"type":18,"tag":295,"props":641,"children":643},{"code":642},"此公式的意义是：J是关于Θ的一个函数，设当前所处的位置为Θ0点，要从这个点走到J的最小值点，也就是“山底”，首先先确定前进的方向，即梯度的反向，然后走一段距离的步长α(称为学习率或者步长)，走完这个段步长，就到达了Θ1这个点。\n注意：梯度前加一个负号，就意味着朝着梯度相反的方向前进，如果是梯度上升算法，当然就不需要添加负号了。\n",[644],{"type":18,"tag":300,"props":645,"children":646},{"__ignoreMap":7},[647],{"type":23,"value":642},{"type":18,"tag":30,"props":649,"children":650},{},[651],{"type":23,"value":652},"下面是用python实现的一个简单的梯度下降算法拟合直线的案例：",{"type":18,"tag":30,"props":654,"children":655},{},[656,658],{"type":23,"value":657},"代码地址：",{"type":18,"tag":318,"props":659,"children":662},{"href":660,"rel":661},"https://xihe.mindspore.cn/projects/hepucuncao/DeepLearning/blob/train/GD.py",[322],[663],{"type":23,"value":664},"MindSpore/hepucuncao/DeepLearning/train/GD.py",{"type":18,"tag":30,"props":666,"children":667},{},[668],{"type":23,"value":669},"至此，多层感知器(MLP)算法的内容我们就基本介绍完了，它们也是接下来要讲的算法的基础。",{"type":18,"tag":30,"props":671,"children":672},{},[673],{"type":23,"value":674},"下面是用python实现的多层感知机(MLP)算法的一个简单实现：",{"type":18,"tag":30,"props":676,"children":677},{},[678,679],{"type":23,"value":657},{"type":18,"tag":318,"props":680,"children":683},{"href":681,"rel":682},"https://xihe.mindspore.cn/projects/hepucuncao/DeepLearning/blob/train/MLP.py",[322],[684],{"type":23,"value":685},"MindSpore/hepucuncao/DeepLearning/train/MLP.py",{"type":18,"tag":120,"props":687,"children":688},{},[],{"type":18,"tag":155,"props":690,"children":692},{"id":691},"卷积神经网络cnn",[693],{"type":23,"value":103},{"type":18,"tag":30,"props":695,"children":696},{},[697],{"type":23,"value":698},"卷积神经网络(Convolutional Neural Networks, CNN)是一类包含卷积计算且具有深度结构的前馈神经网络(Feedforward Neural Networks)，是深度学习的代表算法之一。目前CNN 已经得到了广泛的应用，CNN最擅长的就是图片的处理，它受到人类视觉神经系统的启发。",{"type":18,"tag":30,"props":700,"children":701},{},[702],{"type":18,"tag":36,"props":703,"children":704},{},[705],{"type":23,"value":706},"卷积神经网络的层级结构",{"type":18,"tag":30,"props":708,"children":709},{},[710],{"type":23,"value":711},"最左边是数据输入层： 对数据做一些处理，CNN只对训练集去均值(把输入数据各个维度都中心化为0，避免数据过多偏差，影响训练效果)。",{"type":18,"tag":30,"props":713,"children":714},{},[715],{"type":23,"value":716},"中间是：",{"type":18,"tag":67,"props":718,"children":719},{},[720,725,730],{"type":18,"tag":71,"props":721,"children":722},{},[723],{"type":23,"value":724},"CONV：卷积计算层，即线性乘积求和，主要作用是保留图片的特征。**(核心)**",{"type":18,"tag":71,"props":726,"children":727},{},[728],{"type":23,"value":729},"RELU：激励层，ReLU是激活函数的一种。",{"type":18,"tag":71,"props":731,"children":732},{},[733],{"type":23,"value":734},"POOL：池化层，即取区域平均或最大，主要作用是把数据降维，可以有效的避免过拟合。",{"type":18,"tag":30,"props":736,"children":737},{},[738],{"type":23,"value":739},"最右边是",{"type":18,"tag":67,"props":741,"children":742},{},[743],{"type":18,"tag":71,"props":744,"children":745},{},[746],{"type":23,"value":747},"FC：全连接层，主要作用是根据不同任务输出我们想要的结果。",{"type":18,"tag":30,"props":749,"children":750},{},[751],{"type":18,"tag":137,"props":752,"children":754},{"alt":7,"src":753},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/1f481d0f8e4c4366b23f994137b7d300.png",[],{"type":18,"tag":30,"props":756,"children":757},{},[758],{"type":18,"tag":36,"props":759,"children":760},{},[761],{"type":23,"value":762},"卷积计算层",{"type":18,"tag":30,"props":764,"children":765},{},[766],{"type":23,"value":767},"通俗来说，将未知图案的局部和标准图案的局部一块一块地进行比对的计算过程，便是卷积操作，这个拿来比对的“小块”称之为Features(特征)。卷积计算结果为1表示匹配，否则不匹配。这个过程涉及到了一些数学操作，就是我们常说的“卷积”，因此我们先来了解一下什么是卷积。",{"type":18,"tag":30,"props":769,"children":770},{},[771],{"type":18,"tag":36,"props":772,"children":773},{},[774],{"type":23,"value":775},"什么是卷积",{"type":18,"tag":30,"props":777,"children":778},{},[779],{"type":23,"value":780},"对图像(不同的数据窗口数据)和滤波矩阵(一组固定的权重：因为每个神经元的多个权重固定，所以又可以看做一个恒定的滤波器filter做内积(逐个元素相乘再求和)的操作就是所谓的『卷积』操作，这也是卷积神经网络的名字来源。",{"type":18,"tag":30,"props":782,"children":783},{},[784],{"type":18,"tag":137,"props":785,"children":787},{"alt":7,"src":786},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/2a63e50e12dd4a568b528727fd36be40.png",[],{"type":18,"tag":30,"props":789,"children":790},{},[791],{"type":23,"value":792},"上图中红框框起来的部分可以理解为一个滤波器，即带着一组固定权重的神经元，不同的滤波器filter会得到不同的输出数据，多个滤波器叠加便成了卷积层，用来提取图像的不同特征。在CNN中，滤波器filter对局部输入数据进行卷积计算，每计算完一个数据窗口内的局部数据后，数据窗口不断平移滑动，直到计算完所有数据。这个过程中有以下参数：",{"type":18,"tag":30,"props":794,"children":795},{},[796],{"type":23,"value":797},"　　a. 深度(depth)：神经元个数，决定输出的depth厚度。同时代表滤波器个数。",{"type":18,"tag":30,"props":799,"children":800},{},[801],{"type":23,"value":802},"　　b. 步长(stride)：决定滑动多少步可以到边缘。",{"type":18,"tag":30,"props":804,"children":805},{},[806],{"type":23,"value":807},"　　c. 填充值(zero-padding)：在外围边缘补充若干圈0，方便从初始位置以步长为单位可以刚好滑倒末尾位置，通俗地讲就是为了总长能被步长整除 (一定程度上也可以弥补边界特征利用不充分的问题)。有三种填充方式：全零填充padding='same'、不填充padding='valid'、自定义填充padding=[[0,0],[上,下],[左,右],[0,0]]。",{"type":18,"tag":30,"props":809,"children":810},{},[811,813,818,820,825],{"type":23,"value":812},"数据窗口变化的过程中，每次滤波器都是针对某一局部的数据窗口进行卷积，这就是所谓的CNN中的",{"type":18,"tag":36,"props":814,"children":815},{},[816],{"type":23,"value":817},"局部感知机制",{"type":23,"value":819},"。与此同时，中间滤波器Filter的权重是固定不变的，这就是CNN中的",{"type":18,"tag":36,"props":821,"children":822},{},[823],{"type":23,"value":824},"参数(权重)共享机制",{"type":23,"value":826},"，这是卷积层最主要两个特征。",{"type":18,"tag":295,"props":828,"children":830},{"code":829},"1.通过卷积操作实现局部连接，这个局部区域的大小就是滤波器filter，避免了全连接中参数过多造成无法计算的情况。\n2.再通过参数共享来缩减实际参数的数量，为实现多层网络提供了可能。\n",[831],{"type":18,"tag":300,"props":832,"children":833},{"__ignoreMap":7},[834],{"type":23,"value":829},{"type":18,"tag":30,"props":836,"children":837},{},[838],{"type":23,"value":839},"下面我们来分析一下具体的计算过程，如下图所示：",{"type":18,"tag":30,"props":841,"children":842},{},[843],{"type":18,"tag":137,"props":844,"children":846},{"alt":7,"src":845},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/e516ecc9e80c40bb97d56554ce4f55e7.png",[],{"type":18,"tag":30,"props":848,"children":849},{},[850],{"type":23,"value":851},"类似ω*X+b，其中ω对应滤波器Filter w0，X对应不同的数据窗口，b对应Bios b0，简单来说就是滤波器Filter w0与一个个数据窗口相乘再求和后，最后加上Bias b0得到输出结果。然后滤波器Filter w0固定不变，数据窗口向右移动步长stride个单位，继续做内积计算，重复这个过程。",{"type":18,"tag":295,"props":853,"children":855},{"code":854},"    注意：这里并不同于矩阵中的乘法，而是对应位置相乘后再求和。上图展示的是**多通道输入，单卷积核**的卷积操作(但是一个卷积核可以有多个通道,默认情况下，卷积核的通道数等于输入图片的通道数)。除此之外，还有**单通道输入，单卷积核**和**多通道输入，多卷积核**两种卷积操作，其中多通道输入、多卷积核是深度神经网络中间最常见的形式。\n    总结：输出的通道数=卷积核的个数   卷积核的通道数=输入的通道数   偏置数=卷积核数\n",[856],{"type":18,"tag":300,"props":857,"children":858},{"__ignoreMap":7},[859],{"type":23,"value":854},{"type":18,"tag":30,"props":861,"children":862},{},[863],{"type":18,"tag":36,"props":864,"children":865},{},[866],{"type":23,"value":867},"ReLU激励层",{"type":18,"tag":30,"props":869,"children":870},{},[871],{"type":23,"value":872},"在MLP算法中我们介绍了sigmoid函数，但在实际梯度下降的过程中，sigmoid函数容易饱和，造成梯度传递终止。因此我们可以使用另一个激活函数：ReLU，它的优点是收敛快且求梯度比较简单，其图形表示如下图所示：",{"type":18,"tag":30,"props":874,"children":875},{},[876],{"type":18,"tag":137,"props":877,"children":879},{"alt":7,"src":878},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/3b3fdbf16b084004a9d6fee6e7d3875f.png",[],{"type":18,"tag":30,"props":881,"children":882},{},[883],{"type":18,"tag":36,"props":884,"children":885},{},[886],{"type":23,"value":887},"池化pool层",{"type":18,"tag":30,"props":889,"children":890},{},[891],{"type":23,"value":892},"在卷积层中，可以通过调节步长参数实现特征图的高宽成倍缩小，从而降低了网络的参数量。实际上，除了通过设置步长，还有一种专门的网络层可以实现尺寸缩减功能，它就是我们要介绍的池化层(Pooling layer)。通常我们用到两种池化进行下采样：(1)**最大池化(Max Pooling)**，从局部相关元素集中选取最大的一个元素值;(2)**平均池化(Average Pooling)**，从局部相关元素集中计算平均值并返回。如下图展示的就是最大池化的操作：",{"type":18,"tag":30,"props":894,"children":895},{},[896],{"type":18,"tag":137,"props":897,"children":899},{"alt":7,"src":898},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/f1d4cfebcdab4d358d1dd15977a66a97.png",[],{"type":18,"tag":30,"props":901,"children":902},{},[903],{"type":18,"tag":36,"props":904,"children":905},{},[906],{"type":23,"value":907},"全连接层FC",{"type":18,"tag":30,"props":909,"children":910},{},[911],{"type":23,"value":912},"之所以叫做全连接，是因为每个神经元与前后相邻层的每一个神经元都有连接关系。如下图所示，这是一个简单的两层全连接网络，输入特征，输出的是预测的结果。而实际应用中一般不会将原始图片直接喂入全连接网络，会先对原始图像进行卷积特征提取，把提取到的特征喂给全连接网络，再让全连接网络计算出分类评估值。",{"type":18,"tag":30,"props":914,"children":915},{},[916],{"type":18,"tag":137,"props":917,"children":919},{"alt":7,"src":918},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/6fd6f352b8e84ecc8e47b9d02e759f2a.png",[],{"type":18,"tag":30,"props":921,"children":922},{},[923,925],{"type":23,"value":924},"全连接层的参数量是可以直接计算的，计算公式为：",{"type":18,"tag":36,"props":926,"children":927},{},[928],{"type":23,"value":929},"参数个数=∑(前层*后层+后层)",{"type":18,"tag":30,"props":931,"children":932},{},[933],{"type":23,"value":934},"和多层神经网络一样，卷积神经网络中的参数训练也是使用误差反向传播算法。",{"type":18,"tag":30,"props":936,"children":937},{},[938],{"type":23,"value":939},"至此，我们介绍完了深度学习中的一个经典算法--卷积神经网络模型(CNN)，接下来我们要介绍的是另一种也非常经典的算法--循环神经网络模型(RNN)。",{"type":18,"tag":30,"props":941,"children":942},{},[943],{"type":23,"value":944},"下面是用python实现的卷积神经网络(CNN)算法的一个简单实现：",{"type":18,"tag":30,"props":946,"children":947},{},[948,949],{"type":23,"value":657},{"type":18,"tag":318,"props":950,"children":953},{"href":951,"rel":952},"https://xihe.mindspore.cn/projects/hepucuncao/DeepLearning/blob/train/CNN.py",[322],[954],{"type":23,"value":955},"MindSpore/hepucuncao/DeepLearning/train/CNN.py",{"type":18,"tag":120,"props":957,"children":958},{},[],{"type":18,"tag":155,"props":960,"children":962},{"id":961},"循环神经网络rnn",[963],{"type":23,"value":108},{"type":18,"tag":30,"props":965,"children":966},{},[967],{"type":18,"tag":36,"props":968,"children":969},{},[970],{"type":23,"value":971},"什么是RNNs",{"type":18,"tag":30,"props":973,"children":974},{},[975],{"type":23,"value":976},"在传统的神经网络模型中，是从输入层到隐含层再到输出层，层与层之间是全连接的，每层之间的节点是无连接的，但是这种普通的神经网络对于很多问题却无能无力。RNNs之所以称为循环神经网络，是因为一个序列当前的输出与前面的输出也有关。具体的表现形式为网络会对前面的信息进行记忆并应用于当前输出的计算中，即隐藏层之间的节点不再无连接而是有连接的，并且隐藏层的输入不仅包括输入层的输出还包括上一时刻隐藏层的输出。下图是一个典型的RNNs：",{"type":18,"tag":30,"props":978,"children":979},{},[980],{"type":18,"tag":137,"props":981,"children":983},{"alt":7,"src":982},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/0211d02bdee84a1f826a60461ca64abf.png",[],{"type":18,"tag":295,"props":985,"children":987},{"code":986},"从图中可以看出，有一条单向流动的信息流是从输入单元到达隐藏单元的，与此同时另一条单向流动的信息流从隐藏单元到达输出单元。在某些情况下，RNNs也会引导信息从输出单元返回隐藏单元，称为“Back Projections”，并且隐藏层的输入还包括上一隐藏层的状态，即隐藏层内的节点可以自连也可以互连。\n",[988],{"type":18,"tag":300,"props":989,"children":990},{"__ignoreMap":7},[991],{"type":23,"value":986},{"type":18,"tag":30,"props":993,"children":994},{},[995,997,1002],{"type":23,"value":996},"上图中每一步都会有输出，但是并不是每一步都要有输出，当然也不是每步都需要输入。RNNs的关键之处在于",{"type":18,"tag":36,"props":998,"children":999},{},[1000],{"type":23,"value":1001},"隐藏层",{"type":23,"value":1003},"，隐藏层能够捕捉序列的信息。",{"type":18,"tag":30,"props":1005,"children":1006},{},[1007],{"type":23,"value":1008},"下面是一些常见的RNNs模型：",{"type":18,"tag":30,"props":1010,"children":1011},{},[1012],{"type":18,"tag":36,"props":1013,"children":1014},{},[1015],{"type":23,"value":1016},"Simple RNNs(SRNs)",{"type":18,"tag":30,"props":1018,"children":1019},{},[1020],{"type":23,"value":1021},"SRNs是RNNs的一种特例，它是一个三层网络，并且在隐藏层增加了上下文单元。上下文单元节点与隐藏层中的节点的连接是固定的，并且权值也是固定的。在每一步中，使用标准的前向反馈进行传播，然后使用学习算法进行学习。上下文每一个节点保存其连接的隐藏层节点的上一步的输出，即保存上文，并作用于当前步对应的隐藏层节点的状态，即隐藏层的输入由输入层的输出与上一步的自己的状态所决定的，因此SRNs能够解决标准的多层感知机(MLP)无法解决的对序列数据进行预测的任务。SRNs网络结构如下图所示：",{"type":18,"tag":30,"props":1023,"children":1024},{},[1025],{"type":18,"tag":137,"props":1026,"children":1028},{"alt":7,"src":1027},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/7fad1777666c4ec1abc479f15a9c307d.png",[],{"type":18,"tag":30,"props":1030,"children":1031},{},[1032],{"type":18,"tag":36,"props":1033,"children":1034},{},[1035],{"type":23,"value":1036},"Bidirectional RNNs",{"type":18,"tag":30,"props":1038,"children":1039},{},[1040],{"type":23,"value":1041},"Bidirectional RNNs(双向网络)的改进之处便是，假设当前的输出(第步的输出)不仅仅与前面的序列有关，并且还与后面的序列有关。例如：预测一个语句中缺失的词语那么就需要根据上下文来进行预测。Bidirectional RNNs是一个相对较简单的RNNs，是由两个RNNs上下叠加在一起组成的,输出由这两个RNNs的隐藏层的状态决定的。如下图所示：",{"type":18,"tag":30,"props":1043,"children":1044},{},[1045],{"type":18,"tag":137,"props":1046,"children":1048},{"alt":7,"src":1047},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/72f463c2f3084282a2b624c9fc1d0f1c.png",[],{"type":18,"tag":30,"props":1050,"children":1051},{},[1052],{"type":18,"tag":36,"props":1053,"children":1054},{},[1055],{"type":23,"value":1056},"Deep(Bidirectional)RNNs",{"type":18,"tag":30,"props":1058,"children":1059},{},[1060],{"type":23,"value":1061},"Deep(Bidirectional)RNNs与Bidirectional RNNs相似，只是对于每一步的输入有多层网络，使该网络有更强大的表达与学习能力，但是复杂性也提高了，同时需要更多的训练数据。Deep(Bidirectional)RNNs的结构如下图所示：",{"type":18,"tag":30,"props":1063,"children":1064},{},[1065],{"type":18,"tag":137,"props":1066,"children":1068},{"alt":7,"src":1067},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/5414f15c92b34d19a40468a15a952a64.png",[],{"type":18,"tag":30,"props":1070,"children":1071},{},[1072],{"type":18,"tag":36,"props":1073,"children":1074},{},[1075],{"type":23,"value":1076},"Echo State Networks(ESNs)",{"type":18,"tag":30,"props":1078,"children":1079},{},[1080],{"type":23,"value":1081},"ESNs(回声状态网络)也是一种RNNs，但是它与传统的RNNs相差很大。ESNs具有以下三个特点：",{"type":18,"tag":67,"props":1083,"children":1084},{},[1085,1090,1095],{"type":18,"tag":71,"props":1086,"children":1087},{},[1088],{"type":23,"value":1089},"它的核心结构时一个随机生成、且保持不变的储备池(Reservoir)，储备池是大规模的、随机生成的、稀疏连接(SD通常保持1%～5%，SD表示储备池中互相连接的神经元占总的神经元个数N的比例)的循环结构；",{"type":18,"tag":71,"props":1091,"children":1092},{},[1093],{"type":23,"value":1094},"其储备池到输出层的权值矩阵是唯一需要调整的部分；",{"type":18,"tag":71,"props":1096,"children":1097},{},[1098],{"type":23,"value":1099},"简单的线性回归就可完成网络的训练。",{"type":18,"tag":30,"props":1101,"children":1102},{},[1103],{"type":23,"value":1104},"从结构上讲，ESNs是一种特殊类型的循环神经网络，其基本思想是：使用大规模随机连接的循环网络取代经典神经网络中的中间层，从而简化网络的训练过程。因此ESNs的关键是中间的储备池。网络中的参数包括：为储备池中节点的连接权值矩阵，为输入层到储备池之间的连接权值矩阵，表明储备池中的神经元之间是连接的，为输出层到储备池之间的反馈连接权值矩阵，表明储备池会有输出层来的反馈，为输入层、储备池、输出层到输出层的连接权值矩阵，表明输出层不仅与储备池连接，还与输入层和自己连接。表示输出层的偏置项。",{"type":18,"tag":30,"props":1106,"children":1107},{},[1108],{"type":23,"value":1109},"以下是ESNs储备池的四个参数：",{"type":18,"tag":67,"props":1111,"children":1112},{},[1113,1118,1123,1128],{"type":18,"tag":71,"props":1114,"children":1115},{},[1116],{"type":23,"value":1117},"储备池内部连接权谱半径SR(只有SR \u003C1时，ESNs才能具有回声状态属性)",{"type":18,"tag":71,"props":1119,"children":1120},{},[1121],{"type":23,"value":1122},"储备池规模N(即储备池中神经元的个数)",{"type":18,"tag":71,"props":1124,"children":1125},{},[1126],{"type":23,"value":1127},"储备池输入单元尺度IS(IS为储备池的输入信号连接到储备池内部神经元之前需要相乘的一个尺度因子)",{"type":18,"tag":71,"props":1129,"children":1130},{},[1131],{"type":23,"value":1132},"储备池稀疏程度SD(即为储备池中互相连接的神经元个数占储备池神经元总个数的比例)",{"type":18,"tag":30,"props":1134,"children":1135},{},[1136],{"type":23,"value":1137},"对于IS，如果需要处理的任务的非线性越强，那么输入单元尺度就越大。该原则的本质就是通过输入单元尺度IS，将输入变换到神经元激活函数相应的范围(神经元激活函数的不同输入范围，其非线性程度不同)。ESNs的结构如下图所示：",{"type":18,"tag":30,"props":1139,"children":1140},{},[1141],{"type":18,"tag":137,"props":1142,"children":1144},{"alt":7,"src":1143},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/4eb81ccc56d64a4790c70577720669de.png",[],{"type":18,"tag":30,"props":1146,"children":1147},{},[1148],{"type":18,"tag":36,"props":1149,"children":1150},{},[1151],{"type":23,"value":1152},"Gated Recurrent Unit Recurrent Neural Networks(GRUs)",{"type":18,"tag":30,"props":1154,"children":1155},{},[1156],{"type":23,"value":1157},"GRUs主要是从以下两个方面进行改进:一是，序列中不同的位置处的输入对当前的隐藏层的状态的影响不同，越前面的影响越小，即每个前面状态对当前的影响进行了距离加权，距离越远，权值越小；二是，在产生误差error时，误差可能是由某一个或者几个输入而引发的，所以应当仅仅对对应的输入weight进行更新。GRUs的结构如下图所示：",{"type":18,"tag":30,"props":1159,"children":1160},{},[1161],{"type":18,"tag":137,"props":1162,"children":1164},{"alt":7,"src":1163},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/e365210f36574ec6896acbbb9de6b356.png",[],{"type":18,"tag":295,"props":1166,"children":1168},{"code":1167},"GRUs首先根据当前输入以及前一个隐藏层的状态hidden state计算出update gate和reset gate，再根据reset gate、当前输入以及前一个hidden state计算新的记忆单元内容(new memory content)。当reset gate为1的时候，new memory content忽略之前的所有memory content，最终的memory是之前的hidden state与new memory content的结合。\n",[1169],{"type":18,"tag":300,"props":1170,"children":1171},{"__ignoreMap":7},[1172],{"type":23,"value":1167},{"type":18,"tag":30,"props":1174,"children":1175},{},[1176],{"type":18,"tag":36,"props":1177,"children":1178},{},[1179],{"type":23,"value":1180},"LSTM Netwoorks(LSTMs)",{"type":18,"tag":30,"props":1182,"children":1183},{},[1184],{"type":23,"value":1185},"LSTMs与GRUs类似，与一般的RNNs结构本质上并没有什么不同，只是使用了不同的函数来计算隐藏层的状态。在LSTMs中，i结构被称为cells，可以把cells看作是黑盒用以保存当前输入之前保存的状态，这些cells决定哪些cell抑制哪些cell兴奋，它们结合前面的状态、当前的记忆以及当前的输入。已经证明，该网络结构在对长序列依赖问题中非常有效，LSTMs的网络结构如下图所示：",{"type":18,"tag":30,"props":1187,"children":1188},{},[1189],{"type":18,"tag":137,"props":1190,"children":1192},{"alt":7,"src":1191},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/32276b3488044880adc40c3677bcf51a.png",[],{"type":18,"tag":30,"props":1194,"children":1195},{},[1196],{"type":18,"tag":36,"props":1197,"children":1198},{},[1199],{"type":23,"value":1200},"Clockwork RNNs(CW-RNNs)",{"type":18,"tag":30,"props":1202,"children":1203},{},[1204],{"type":23,"value":1205},"CW-RNNs是一种使用时钟频率来驱动的RNNs。它将隐藏层分为几个块(组，Group/Module)，每一组按照自己规定的时钟频率对输入进行处理。并且为了降低标准的RNNs的复杂性，CW-RNNs减少了参数的数目，提高了网络性能，加速了网络的训练。CW-RNNs通过不同的隐藏层模块工作在不同的时钟频率下来解决长时间依赖问题。将时钟时间进行离散化，然后在不同的时间点，不同的隐藏层组在工作。因此，所有的隐藏层组在每一步不会都同时工作，这样便会加快网络的训练。并且，时钟周期小的组的神经元的不会连接到时钟周期大的组的神经元，只会周期大的连接到周期小的(可以认为组与组之间的连接是有向的就好了，代表信息的传递是有向的)，周期大的速度慢，周期小的速度快，那么便是速度慢的连速度快的，反之则不成立。。",{"type":18,"tag":30,"props":1207,"children":1208},{},[1209],{"type":23,"value":1210},"CW-RNNs与SRNs网络结构类似，也包括输入层(Input)、隐藏层(Hidden)、输出层(Output)，它们之间也有向前连接，即输入层到隐藏层的连接，隐藏层到输出层的连接。但是与SRN不同的是，隐藏层中的神经元会被划分为若干个组，每一组中的神经元个数相同，并为每一个组分配一个时钟周期，每一个组中的所有神经元都是全连接。CW-RNNs的网络结构如下图所示：",{"type":18,"tag":30,"props":1212,"children":1213},{},[1214],{"type":18,"tag":137,"props":1215,"children":1217},{"alt":7,"src":1216},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/0d97986abf5844e0b64a0933916af280.png",[],{"type":18,"tag":295,"props":1219,"children":1221},{"code":1220},"如上图所示，将这些组按照时钟周期递增从左到右进行排序，那么连接便是从右到左。例如：隐藏层共有256个节点，分为四组，周期分别是[1,2,4,8]，那么每个隐藏层组256/4=64个节点，第一组隐藏层与隐藏层的连接矩阵为64*64的矩阵，第二层的矩阵则为64*128矩阵，第三组为64*(3*64)=64*192矩阵，第四组为64*(4*64)=64*256矩阵。这就解释了上述为什么说，速度慢的组连到速度快的组，反之则不成立。\n",[1222],{"type":18,"tag":300,"props":1223,"children":1224},{"__ignoreMap":7},[1225],{"type":23,"value":1220},{"type":18,"tag":30,"props":1227,"children":1228},{},[1229],{"type":23,"value":1230},"下面是用python实现的循环神经网络(RNN)算法的一个简单实现：",{"type":18,"tag":30,"props":1232,"children":1233},{},[1234,1235],{"type":23,"value":657},{"type":18,"tag":318,"props":1236,"children":1239},{"href":1237,"rel":1238},"https://xihe.mindspore.cn/projects/hepucuncao/DeepLearning/blob/train/RNN.py",[322],[1240],{"type":23,"value":1241},"MindSpore/hepucuncao/DeepLearning/train/RNN.py",{"type":18,"tag":30,"props":1243,"children":1244},{},[1245],{"type":23,"value":1246},"RNN在处理长期依赖(时间序列上距离较远的节点)时会遇到巨大困难，因为计算距离较远的节点之间的联系时会涉及雅可比矩阵的多次相乘，会造成梯度消失或者梯度膨胀的现象。其中最成功、应用最广泛的就是门限RNN(Gated RNN)，而LSTM就是门限RNN中最著名的一种,接下来我们介绍LSTM长短期记忆神经网络。",{"type":18,"tag":120,"props":1248,"children":1249},{},[],{"type":18,"tag":155,"props":1251,"children":1253},{"id":1252},"lstm长短期记忆神经网络",[1254],{"type":23,"value":113},{"type":18,"tag":30,"props":1256,"children":1257},{},[1258],{"type":18,"tag":36,"props":1259,"children":1260},{},[1261],{"type":23,"value":1262},"RNN和LSTM的区别",{"type":18,"tag":30,"props":1264,"children":1265},{},[1266],{"type":23,"value":1267},"所有 RNN 都具有一种重复神经网络模块的链式的形式。在标准的 RNN 中，这个重复的模块只有一个非常简单的结构，例如一个 tanh 层，如下图所示：",{"type":18,"tag":30,"props":1269,"children":1270},{},[1271],{"type":18,"tag":137,"props":1272,"children":1274},{"alt":7,"src":1273},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/5f64a88aa93a49029357674a39383d2d.png",[],{"type":18,"tag":30,"props":1276,"children":1277},{},[1278],{"type":23,"value":1279},"LSTM 同样是这样的结构，但是重复的模块拥有一个不同的结构。不同于单一神经网络层，这里是有四个，以一种非常特殊的方式进行交互。",{"type":18,"tag":30,"props":1281,"children":1282},{},[1283],{"type":18,"tag":137,"props":1284,"children":1286},{"alt":7,"src":1285},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/7e3f1e03f63b42019b4042f0245acee9.png",[],{"type":18,"tag":30,"props":1288,"children":1289},{},[1290],{"type":18,"tag":36,"props":1291,"children":1292},{},[1293],{"type":23,"value":1294},"LSTM核心",{"type":18,"tag":30,"props":1296,"children":1297},{},[1298],{"type":23,"value":1299},"LSTM 有统称为“门”的结构来去除或增加信息到细胞状态的能力。门是一种让信息选择式通过的方法，它包含一个sigmoid神经网络层和一个pointwise乘法操作。示意图如下：",{"type":18,"tag":30,"props":1301,"children":1302},{},[1303],{"type":18,"tag":137,"props":1304,"children":1306},{"alt":7,"src":1305},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/56498dac2c7e48c084e3986fa2601bd8.png",[],{"type":18,"tag":30,"props":1308,"children":1309},{},[1310],{"type":23,"value":1311},"LSTM 拥有三个门，分别是忘记层门，输入层门和输出层门，用来保护和控制细胞状态。",{"type":18,"tag":30,"props":1313,"children":1314},{},[1315],{"type":18,"tag":36,"props":1316,"children":1317},{},[1318],{"type":23,"value":1319},"忘记层门",{"type":18,"tag":30,"props":1321,"children":1322},{},[1323],{"type":23,"value":1324},"作用对象：细胞状态",{"type":18,"tag":30,"props":1326,"children":1327},{},[1328],{"type":23,"value":1329},"作用：将细胞状态中的信息选择性遗忘",{"type":18,"tag":30,"props":1331,"children":1332},{},[1333],{"type":23,"value":1334},"操作步骤：该门会读取ht−1和xt，输出一个在 0 到 1 之间的数值给每个在细胞状态Ct−1中的数字。1表示“完全保留”，0表示“完全舍弃”。示意图如下：",{"type":18,"tag":30,"props":1336,"children":1337},{},[1338],{"type":18,"tag":137,"props":1339,"children":1341},{"alt":7,"src":1340},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/4d6ed2f46a5c4dce97c5bd5418fa3160.png",[],{"type":18,"tag":30,"props":1343,"children":1344},{},[1345],{"type":18,"tag":36,"props":1346,"children":1347},{},[1348],{"type":23,"value":1349},"输入层门",{"type":18,"tag":30,"props":1351,"children":1352},{},[1353],{"type":23,"value":1324},{"type":18,"tag":30,"props":1355,"children":1356},{},[1357],{"type":23,"value":1358},"作用：将新的信息选择性的记录到细胞状态中",{"type":18,"tag":30,"props":1360,"children":1361},{},[1362],{"type":23,"value":1363},"操作步骤：",{"type":18,"tag":30,"props":1365,"children":1366},{},[1367],{"type":23,"value":1368},"步骤一，sigmoid层称“输入门层”决定什么值我们将要更新。",{"type":18,"tag":30,"props":1370,"children":1371},{},[1372],{"type":23,"value":1373},"步骤二，tanh 层创建一个新的候选值向量Ct加入到状态中。其示意图如下：",{"type":18,"tag":30,"props":1375,"children":1376},{},[1377],{"type":18,"tag":137,"props":1378,"children":1380},{"alt":7,"src":1379},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/0d52863365234d919114448f19f6b7a4.png",[],{"type":18,"tag":30,"props":1382,"children":1383},{},[1384],{"type":23,"value":1385},"步骤三：将Ct−1更新为Ct。将旧状态与ft相乘，丢弃掉我们确定需要丢弃的信息。接着加上it∗Ct得到新的候选值，根据我们决定更新每个状态的程度进行变化。其示意图如下：",{"type":18,"tag":30,"props":1387,"children":1388},{},[1389],{"type":18,"tag":137,"props":1390,"children":1392},{"alt":7,"src":1391},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/417e46a1cfe04af3a5f257eabad49d44.png",[],{"type":18,"tag":30,"props":1394,"children":1395},{},[1396],{"type":18,"tag":36,"props":1397,"children":1398},{},[1399],{"type":23,"value":1400},"输出层门",{"type":18,"tag":30,"props":1402,"children":1403},{},[1404],{"type":23,"value":1405},"作用对象：隐层ht",{"type":18,"tag":30,"props":1407,"children":1408},{},[1409],{"type":23,"value":1410},"作用：确定输出什么值",{"type":18,"tag":30,"props":1412,"children":1413},{},[1414],{"type":23,"value":1363},{"type":18,"tag":30,"props":1416,"children":1417},{},[1418],{"type":23,"value":1419},"步骤一：通过sigmoid层来确定细胞状态的哪个部分将输出。",{"type":18,"tag":30,"props":1421,"children":1422},{},[1423],{"type":23,"value":1424},"步骤二：把细胞状态通过tanh进行处理，并将它和sigmoid门的输出相乘，最终仅输出我们确定输出的那部分。其示意图如下所示：",{"type":18,"tag":30,"props":1426,"children":1427},{},[1428],{"type":18,"tag":137,"props":1429,"children":1431},{"alt":7,"src":1430},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/b0b39a96e5c44d769e3b02697bb4c320.png",[],{"type":18,"tag":155,"props":1433,"children":1435},{"id":1434},"强化学习rl",[1436],{"type":23,"value":118},{"type":18,"tag":30,"props":1438,"children":1439},{},[1440],{"type":23,"value":1441},"强化学习Reinforcement Learning (RL)属于机器学习的一种，不同于监督学习和无监督学习，它是通过智能体与环境的不断交互进而获得奖励，从而不断优化自身动作策略，以期待最大化其长期收益。强化学习适用于序贯决策问题(涉及一系列有序的决策问题)。",{"type":18,"tag":30,"props":1443,"children":1444},{},[1445],{"type":18,"tag":36,"props":1446,"children":1447},{},[1448],{"type":23,"value":1449},"强化学习的定义",{"type":18,"tag":30,"props":1451,"children":1452},{},[1453],{"type":23,"value":1454},"智能体与环境的不断交互(即在给定状态采取动作)，进而获得奖励，此时环境从一个状态转移到下一个状态。智能体通过不断优化自身动作策略，以期待最大化其长期回报或收益(奖励之和)。",{"type":18,"tag":30,"props":1456,"children":1457},{},[1458],{"type":18,"tag":137,"props":1459,"children":1461},{"alt":7,"src":1460},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/03cd319e8f0a4ec4bb869cde3af79e33.png",[],{"type":18,"tag":30,"props":1463,"children":1464},{},[1465],{"type":18,"tag":36,"props":1466,"children":1467},{},[1468],{"type":23,"value":1469},"深度强化学习",{"type":18,"tag":30,"props":1471,"children":1472},{},[1473],{"type":23,"value":1474},"Deep Learning(DL) + Reinforcement Learning(RL) = Deep Reinforcement Learning(DRL)",{"type":18,"tag":30,"props":1476,"children":1477},{},[1478],{"type":23,"value":1479},"深度学习DL有很强的抽象和表示能力，特别适合建模RL中的值函数，二者结合，极大地拓展了RL的应用范围。深度强化学习的算法比较多，常见的有：DQN，DDPG，PPO，TRPO，A3C，SAC等。",{"type":18,"tag":30,"props":1481,"children":1482},{},[1483],{"type":18,"tag":36,"props":1484,"children":1485},{},[1486],{"type":23,"value":1487},"Deep Q-Networks(DQN)算法",{"type":18,"tag":30,"props":1489,"children":1490},{},[1491],{"type":23,"value":1492},"DQN，即深度Q网络（Deep Q-network），是指基于深度学习的Q-Learing算法。",{"type":18,"tag":30,"props":1494,"children":1495},{},[1496],{"type":18,"tag":36,"props":1497,"children":1498},{},[1499],{"type":23,"value":1500},"1)DQN训练过程",{"type":18,"tag":30,"props":1502,"children":1503},{},[1504],{"type":23,"value":1505},"神经网络的的输入是状态s，输出是对所有动作a的打分",{"type":18,"tag":30,"props":1507,"children":1508},{},[1509],{"type":18,"tag":137,"props":1510,"children":1512},{"alt":7,"src":1511},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/49ca13a89b90408680b7a19e72d98a28.png",[],{"type":18,"tag":30,"props":1514,"children":1515},{},[1516],{"type":23,"value":1517},"最原始的DQN算法具体过程如下：",{"type":18,"tag":30,"props":1519,"children":1520},{},[1521],{"type":18,"tag":137,"props":1522,"children":1524},{"alt":7,"src":1523},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/3eba8555c48e46178120d53b4645582f.png",[],{"type":18,"tag":30,"props":1526,"children":1527},{},[1528],{"type":18,"tag":137,"props":1529,"children":1531},{"alt":7,"src":1530},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/c15d168447a049d0ab5a941a85497a6a.png",[],{"type":18,"tag":30,"props":1533,"children":1534},{},[1535,1537,1542],{"type":23,"value":1536},"从上述过程我们可以知道，1、用完一个transition就丢弃，会造成对经验的浪费，且按顺序使用transition时，前一个transition和后一个transition相关性很强，这种相关性对学习Q网络是有害的。因此，出现了",{"type":18,"tag":36,"props":1538,"children":1539},{},[1540],{"type":23,"value":1541},"经验回放",{"type":23,"value":1543},"，它可以克服上述两个缺点。",{"type":18,"tag":30,"props":1545,"children":1546},{},[1547],{"type":18,"tag":36,"props":1548,"children":1549},{},[1550],{"type":23,"value":1551},"2)经验回放",{"type":18,"tag":30,"props":1553,"children":1554},{},[1555],{"type":23,"value":1556},"经验回放会构建一个回放缓冲区(replay buffer)，存储n条transition，称为经验。当某一个策略π与环境交互，收集很多条transition放入回放缓冲区，回放缓冲区中的经验transition可能来自不同的策略，且回放缓冲区只有在它装满的时候才会吧旧的数据丢掉。",{"type":18,"tag":30,"props":1558,"children":1559},{},[1560],{"type":18,"tag":137,"props":1561,"children":1563},{"alt":7,"src":1562},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/3279b3f180464d158d92c98fe88bd787.png",[],{"type":18,"tag":30,"props":1565,"children":1566},{},[1567],{"type":23,"value":1568},"每次随机抽出一个batch大小的transition数据训练网络，算出多个随机梯度，用梯度的平均值来更新Q网络参数ω。",{"type":18,"tag":30,"props":1570,"children":1571},{},[1572],{"type":18,"tag":36,"props":1573,"children":1574},{},[1575],{"type":23,"value":1576},"3)目标网络",{"type":18,"tag":295,"props":1578,"children":1580},{"code":1579},"我们为什么要使用目标网络?\n我们在训练网络的时候，动作价值估计和权重w有关。当权重变化时，动作价值的估计也会发生变化。在学习的过程中，动作价值试图追逐一个变化的回报，容易出现不稳定的情况。\n",[1581],{"type":18,"tag":300,"props":1582,"children":1583},{"__ignoreMap":7},[1584],{"type":23,"value":1579},{"type":18,"tag":30,"props":1586,"children":1587},{},[1588,1590,1595],{"type":23,"value":1589},"我们使用另一个网络，称为目标网络:Q(s,a;w1)，网络结构和原来的网络Q(s,a;w)一样，只是参数不同w1 ≠ w，原来的网络称为",{"type":18,"tag":36,"props":1591,"children":1592},{},[1593],{"type":23,"value":1594},"评估网络",{"type":23,"value":1596},"。两个网络的作用不一样：评估网络Q(s,a;w)负责控制智能体，收集经验，而目标网络Q(s,a;w1)用于计算TD target。",{"type":18,"tag":30,"props":1598,"children":1599},{},[1600],{"type":23,"value":1601},"在更新过程中，我们只更新评估网络Q(s,a;w)的权重w，目标网络Q(s,a;w1)的权重w1保持不变。在更新一定次数后，再将更新过的评估网络的权重复制给目标网络，进行下一批更新，这样目标网络也能得到更新。由于在目标网络没有变化的一段时间内回报的目标值是相对固定的，因此目标网络的引入可以增加学习的稳定性。",{"type":18,"tag":30,"props":1603,"children":1604},{},[1605],{"type":18,"tag":137,"props":1606,"children":1608},{"alt":7,"src":1607},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/06/28/dbbc7148c29e48ae9f97db6f69e62e2b.png",[],{"type":18,"tag":30,"props":1610,"children":1611},{},[1612],{"type":18,"tag":36,"props":1613,"children":1614},{},[1615],{"type":23,"value":1616},"Deep Deterministic Policy Gradient(DDPG)算法",{"type":18,"tag":30,"props":1618,"children":1619},{},[1620],{"type":23,"value":1621},"DDPG算法可以看作Deterministic Policy Gradient(DPG)算法和深度神经网络的结合，是对上述深度Q网络(DQN)在连续动作空间的扩展，它在许多连续控制问题上取得了非常不错的效果。具体而言，DDPG算法主要包括以下三个关键技术：",{"type":18,"tag":30,"props":1623,"children":1624},{},[1625],{"type":18,"tag":36,"props":1626,"children":1627},{},[1628],{"type":23,"value":1629},"1)经验回放",{"type":18,"tag":30,"props":1631,"children":1632},{},[1633,1635,1640,1642,1647],{"type":23,"value":1634},"智能体将得到的经验数据(s,a,r,s′,done)放入Replay Buffer中，更新网络参数时按照批量采样,即",{"type":18,"tag":36,"props":1636,"children":1637},{},[1638],{"type":23,"value":1639},"存储",{"type":23,"value":1641},"和",{"type":18,"tag":36,"props":1643,"children":1644},{},[1645],{"type":23,"value":1646},"回放",{"type":23,"value":1648},":",{"type":18,"tag":67,"props":1650,"children":1651},{},[1652,1657],{"type":18,"tag":71,"props":1653,"children":1654},{},[1655],{"type":23,"value":1656},"存储:将经验以(s,a,r,s′,done)形式存储在经验池中(集中式回放or分布式回放)",{"type":18,"tag":71,"props":1658,"children":1659},{},[1660],{"type":23,"value":1661},"回放:按照某种规则从经验池中采样一条或多条经验数据(均匀回放or优先回放)",{"type":18,"tag":30,"props":1663,"children":1664},{},[1665],{"type":18,"tag":36,"props":1666,"children":1667},{},[1668],{"type":23,"value":1669},"2)目标网络",{"type":18,"tag":30,"props":1671,"children":1672},{},[1673],{"type":23,"value":1674},"在Actor网络和Critic网络外再使用一套用于估计目标的Target Actor网络和Target Critic网络。",{"type":18,"tag":30,"props":1676,"children":1677},{},[1678],{"type":23,"value":1679},"在更新目标网络时，为了避免参数更新过快，采用软更新方式，也可以称为指数平均移动(Exponential Moving Average, EMA)，即引入一个学习率(或者称为动量)τ，将旧的目标网络参数和新的对应网络参数做加权平均，然后赋值给目标网络。因此，目标网络的输出会更加稳定，利用目标网络来计算目标值自然也会更加稳定，从而进一步保证Critic网络的学习过程更加平稳。",{"type":18,"tag":30,"props":1681,"children":1682},{},[1683],{"type":23,"value":1684},"上述作用是其中之一，引入目标网络还有另一个作用就是:避免自举(Bootstrapping)问题。自举是指用后继的估算值，来更新现在状态的估算值，它会使网络出现过估计的问题。如果过估计是均匀的，对于最终的决策不会造成影响；但是如果不均匀，对于最终的决策会产生很大影响。",{"type":18,"tag":30,"props":1686,"children":1687},{},[1688],{"type":18,"tag":36,"props":1689,"children":1690},{},[1691],{"type":23,"value":1692},"3)噪声探索",{"type":18,"tag":30,"props":1694,"children":1695},{},[1696],{"type":23,"value":1697},"探索对于智能体来说是至关重要的，确定性策略输出的动作为确定性动作，缺乏对环境的探索。因此在训练阶段，要人为地给Actor网络输出的动作加入噪声，从而让智能体具备一定的探索能力。",{"type":18,"tag":295,"props":1699,"children":1701},{"code":1700},"注意:噪声只会加在训练阶段Actor网络输出的动作上，推理阶段不要加上噪声，以及在更新网络参数时也不要加上噪声，因为我们只需要在训练阶段让智能体具备探索能力，推理时是不需要的该能力的。\n",[1702],{"type":18,"tag":300,"props":1703,"children":1704},{"__ignoreMap":7},[1705],{"type":23,"value":1700},{"title":7,"searchDepth":1707,"depth":1707,"links":1708},4,[1709,1711,1712,1719],{"id":55,"depth":1710,"text":55},2,{"id":60,"depth":1710,"text":60},{"id":65,"depth":1710,"text":65,"children":1713},[1714,1716,1717,1718],{"id":75,"depth":1715,"text":75},3,{"id":80,"depth":1715,"text":80},{"id":306,"depth":1715,"text":306},{"id":85,"depth":1715,"text":85},{"id":90,"depth":1710,"text":90,"children":1720},[1721,1722,1723,1724,1725],{"id":352,"depth":1715,"text":98},{"id":691,"depth":1715,"text":103},{"id":961,"depth":1715,"text":108},{"id":1252,"depth":1715,"text":113},{"id":1434,"depth":1715,"text":118},"markdown","content:technology-blogs:zh:3198.md","content","technology-blogs/zh/3198.md","technology-blogs/zh/3198","md",1776506127061]