[{"data":1,"prerenderedAt":162},["ShallowReactive",2],{"content-query-zh0uToO6TG":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":156,"_id":157,"_source":158,"_file":159,"_stem":160,"_extension":161},"/technology-blogs/zh/1862","zh",false,"","【MindSpore易点通】如何实现梯度不回传以及梯度回传后不更新权重功能","在使用深度学习网络结构执行开发任务时，难免会遇到一些代码上看上去很正常，而且代价函数也在不断减小，但是因为不知道的bug存在，使得我们得到的神经网络并不是最优解。","2022-09-22","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/09/30/1f1cec0290754765981e26762f2effad.png","technology-blogs","开发者分享",{"type":15,"children":16,"toc":144},"root",[17,25,31,37,43,50,60,65,70,76,81,89,93,98,104,109,114,122,129],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"mindspore易点通如何实现梯度不回传以及梯度回传后不更新权重功能",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":29},"h3",{"id":28},"背景信息",[30],{"type":24,"value":28},{"type":18,"tag":32,"props":33,"children":34},"p",{},[35],{"type":24,"value":36},"用户在使用深度学习网络结构执行开发任务时，难免会遇到一些代码上看上去很正常，而且代价函数也在不断减小，但是因为不知道的bug存在，使得我们得到的神经网络并不是最优解。为详细排查所遇问题，故在此提供2种不同接口用以实现梯度不回传以及梯度回传后不更新权重功能。",{"type":18,"tag":26,"props":38,"children":40},{"id":39},"一使用stop_gradient接口实现",[41],{"type":24,"value":42},"一、使用stop_gradient接口实现",{"type":18,"tag":44,"props":45,"children":47},"h4",{"id":46},"_1示例代码",[48],{"type":24,"value":49},"1、示例代码",{"type":18,"tag":51,"props":52,"children":54},"pre",{"code":53},"import numpy as npimport mindspore.ops as opsfrom mindspore import Tensor, contextfrom mindspore.ops import operations as Pimport mindspore.nn as nnimport mindsporefrom mindspore.ops import stop_gradient\n\n#设置训练环境\n\ncontext.set_context(mode=context.GRAPH_MODE, device_target=\"Ascend\")\n\nclass PrintDemo(nn.Cell):\n\n        def __init__(self):\n\n            super(PrintDemo, self).__init__()\n\n            self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=4, stride=1 ,has_bias=False, weight_init='normal', pad_mode='valid')\n\n            self.conv2 = nn.Conv2d(in_channels=6, out_channels=2, kernel_size=2, pad_mode=\"valid\")\n\n            self.conv3 = nn.Conv2d(in_channels=2, out_channels=6, kernel_size=2, pad_mode=\"valid\")\n\n            self.print = P.Print()\n\n        \n\n        #打印出特定层权重输出结果\n\n        def construct(self, input_data):\n\n            x = self.conv1(input_data)\n\n            x = stop_gradient(x)\n\n            self.print(\"self.conv1.weight:\", self.conv1.weight)\n\n            x = self.conv2(x)\n\n            x = self.conv3(x)\n\n            return x\n\ndef test():\n\n    input_data = Tensor(np.ones([1, 1, 32, 32]), mindspore.float32)\n\n    net = PrintDemo()\n\n    net(input_data)\n\n    return net(input_data)\n\n\n\ntest()\n",[55],{"type":18,"tag":56,"props":57,"children":58},"code",{"__ignoreMap":7},[59],{"type":24,"value":53},{"type":18,"tag":32,"props":61,"children":62},{},[63],{"type":24,"value":64},"参数注解：",{"type":18,"tag":32,"props":66,"children":67},{},[68],{"type":24,"value":69},"stop_gradient():在反向传播时禁止网络梯度更新。",{"type":18,"tag":26,"props":71,"children":73},{"id":72},"二使用requires_grad接口实现",[74],{"type":24,"value":75},"二、使用requires_grad接口实现",{"type":18,"tag":44,"props":77,"children":79},{"id":78},"_1示例代码-1",[80],{"type":24,"value":49},{"type":18,"tag":51,"props":82,"children":84},{"code":83},"...\n\nclass PrintDemo(nn.Cell):\n\n\n        #打印出特定层权重输出结果\n\n        def construct(self, input_data):\n\n\n            #x = stop_gradient(x)\n\n\ndef test():\n\n\n    for param in net.trainable_params():\n\n        if 'conv1' in param.name:\n\n            param.requires_grad = False\n\n        else:\n\n            param.requires_grad = True\n\n\n\n\ntest()\n",[85],{"type":18,"tag":56,"props":86,"children":87},{"__ignoreMap":7},[88],{"type":24,"value":83},{"type":18,"tag":32,"props":90,"children":91},{},[92],{"type":24,"value":64},{"type":18,"tag":32,"props":94,"children":95},{},[96],{"type":24,"value":97},"requires_grad:bool类型，当值为True时表面该参数需要更新，反之则不需更新。",{"type":18,"tag":26,"props":99,"children":101},{"id":100},"三实验截图",[102],{"type":24,"value":103},"三、实验截图",{"type":18,"tag":32,"props":105,"children":106},{},[107],{"type":24,"value":108},"梯度更新的中心思想沿着loss函数梯度的方向更新权重以让loss函数的值最小化或accuracy最大化，在示例代码中使用requires_grad与stop_gradient方法实现禁止conv1层梯度更新。实验中conv1层中开始与结束时的权重变化如下图所示：",{"type":18,"tag":32,"props":110,"children":111},{},[112],{"type":24,"value":113},"conv1中权重值的变化：",{"type":18,"tag":32,"props":115,"children":116},{},[117],{"type":18,"tag":118,"props":119,"children":121},"img",{"alt":7,"src":120},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/09/30/990d6d508ed64c96a9d2ce44e9653408.png",[],{"type":18,"tag":32,"props":123,"children":124},{},[125],{"type":18,"tag":118,"props":126,"children":128},{"alt":7,"src":127},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/09/30/01a63b5d50944381b4280c8cc15fc9f6.png",[],{"type":18,"tag":32,"props":130,"children":131},{},[132,134,142],{"type":24,"value":133},"详细代码请前往MindSpore论坛进行下载：",{"type":18,"tag":135,"props":136,"children":140},"a",{"href":137,"rel":138},"https://bbs.huaweicloud.com/forum/thread-0263990476229480016-1-1.html",[139],"nofollow",[141],{"type":24,"value":137},{"type":24,"value":143},"。",{"title":7,"searchDepth":145,"depth":145,"links":146},4,[147,149,152,155],{"id":28,"depth":148,"text":28},3,{"id":39,"depth":148,"text":42,"children":150},[151],{"id":46,"depth":145,"text":49},{"id":72,"depth":148,"text":75,"children":153},[154],{"id":78,"depth":145,"text":49},{"id":100,"depth":148,"text":103},"markdown","content:technology-blogs:zh:1862.md","content","technology-blogs/zh/1862.md","technology-blogs/zh/1862","md",1776506116318]