[{"data":1,"prerenderedAt":340},["ShallowReactive",2],{"content-query-1oUFe1yHXq":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":334,"_id":335,"_source":336,"_file":337,"_stem":338,"_extension":339},"/news/zh/2522","zh",false,"","【昇思MindSpore技术公开课】第五节分布式并行课程回顾来啦！","继Transformer到BERT、GPT2的讲解，相信大家对大模型前期的技术和模型细节有了更深入的了解。上周六（5月20日）我们为大家带来LLM（Large Language Model）预训练的基础——分布式并行技术的讲解。","2023-05-24","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/05/31/d11cc47b623541cfbfccd6badc27ea81.png","news",{"type":14,"children":15,"toc":331},"root",[16,24,29,34,43,51,56,92,97,102,107,112,117,122,130,135,140,145,153,158,163,168,177,190,197,210,217,235,242,255,262,275,283,291,296,303,311,319,324],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思mindspore技术公开课第五节分布式并行课程回顾来啦",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":9},{"type":17,"tag":25,"props":30,"children":31},{},[32],{"type":23,"value":33},"以昇思MindSpore分布式并行特性为依托，详细解释了数据并行、模型并行、Pipeline并行、内存优化等技术，并邀请了昇思MindSpore分布式并行专家苏腾博士进行了《昇腾+昇思超大规模AI实践》分享，向大家展示了昇思MindSpore在大规模预训练模型上的前沿实践。接下来我们对课程进行简单回顾，迎接下一节公开课的进一步深入。",{"type":17,"tag":25,"props":35,"children":36},{},[37],{"type":17,"tag":38,"props":39,"children":40},"strong",{},[41],{"type":23,"value":42},"一、",{"type":17,"tag":25,"props":44,"children":45},{},[46],{"type":17,"tag":38,"props":47,"children":48},{},[49],{"type":23,"value":50},"课程回顾",{"type":17,"tag":25,"props":52,"children":53},{},[54],{"type":23,"value":55},"昇腾+昇思超大规模AI实践：",{"type":17,"tag":57,"props":58,"children":59},"ul",{},[60,87],{"type":17,"tag":61,"props":62,"children":63},"li",{},[64,66,70,72,75,77,80,82,85],{"type":23,"value":65},"大模型的发展以及对系统的挑战",{"type":17,"tag":67,"props":68,"children":69},"br",{},[],{"type":23,"value":71},"1、内存墙",{"type":17,"tag":67,"props":73,"children":74},{},[],{"type":23,"value":76},"2、性能墙",{"type":17,"tag":67,"props":78,"children":79},{},[],{"type":23,"value":81},"3、效率墙",{"type":17,"tag":67,"props":83,"children":84},{},[],{"type":23,"value":86},"4、调优墙",{"type":17,"tag":61,"props":88,"children":89},{},[90],{"type":23,"value":91},"昇思MindSpore大模型关键技术：",{"type":17,"tag":25,"props":93,"children":94},{},[95],{"type":23,"value":96},"1、原生大模型支持",{"type":17,"tag":25,"props":98,"children":99},{},[100],{"type":23,"value":101},"2、大模型使能套件",{"type":17,"tag":25,"props":103,"children":104},{},[105],{"type":23,"value":106},"3、大模型分布式推理",{"type":17,"tag":25,"props":108,"children":109},{},[110],{"type":23,"value":111},"4、大模型集群调优",{"type":17,"tag":25,"props":113,"children":114},{},[115],{"type":23,"value":116},"5、大集群快速故障恢复",{"type":17,"tag":25,"props":118,"children":119},{},[120],{"type":23,"value":121},"6、昇腾亲和的Transformer融合算子",{"type":17,"tag":57,"props":123,"children":124},{},[125],{"type":17,"tag":61,"props":126,"children":127},{},[128],{"type":23,"value":129},"昇思MindSpore大模型训练案例：",{"type":17,"tag":25,"props":131,"children":132},{},[133],{"type":23,"value":134},"1、鹏程盘古千亿稠密大模型训练实例",{"type":17,"tag":25,"props":136,"children":137},{},[138],{"type":23,"value":139},"2、盘古sigma稀疏多任务大模型异构训练",{"type":17,"tag":25,"props":141,"children":142},{},[143],{"type":23,"value":144},"昇思MindSpore分布式并行特性：",{"type":17,"tag":57,"props":146,"children":147},{},[148],{"type":17,"tag":61,"props":149,"children":150},{},[151],{"type":23,"value":152},"数据并行",{"type":17,"tag":25,"props":154,"children":155},{},[156],{"type":23,"value":157},"每一张卡上放置相同的模型参数、梯度、优化器状态",{"type":17,"tag":25,"props":159,"children":160},{},[161],{"type":23,"value":162},"不同的卡送入不同的数据训练",{"type":17,"tag":25,"props":164,"children":165},{},[166],{"type":23,"value":167},"反向传播获得梯度后，进行AllReduce",{"type":17,"tag":25,"props":169,"children":170},{},[171],{"type":17,"tag":172,"props":173,"children":176},"img",{"alt":174,"src":175},"image.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230531061159.86034466911847719175067225685358:50540530071300:2400:CED982A87A879BEF6252CB6F9CB81D0983151EDF5813AE29BABC14C6196F9D1F.png",[],{"type":17,"tag":57,"props":178,"children":179},{},[180],{"type":17,"tag":61,"props":181,"children":182},{},[183,185,188],{"type":23,"value":184},"模型并行",{"type":17,"tag":67,"props":186,"children":187},{},[],{"type":23,"value":189},"模型并行是算子层面的并行，它利用某些算子的特性将算子拆分到多个设备上进行计算。",{"type":17,"tag":25,"props":191,"children":192},{},[193],{"type":17,"tag":172,"props":194,"children":196},{"alt":174,"src":195},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230531061222.54069693038607068379401225264748:50540530071300:2400:C67C1C32996542722B6C971ADE294ADC16D160EC22ECCDA6357AFE08DD94B2A5.png",[],{"type":17,"tag":57,"props":198,"children":199},{},[200],{"type":17,"tag":61,"props":201,"children":202},{},[203,205,208],{"type":23,"value":204},"Pipeline并行",{"type":17,"tag":67,"props":206,"children":207},{},[],{"type":23,"value":209},"流水线（Pipeline）并行是将神经网络中的算子切分成多个阶段（Stage），再把阶段映射到不同的设备上，使得不同设备去计算神经网络的不同部分。",{"type":17,"tag":25,"props":211,"children":212},{},[213],{"type":17,"tag":172,"props":214,"children":216},{"alt":174,"src":215},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230531061242.36751896087455227568447437627522:50540530071300:2400:93342F724CFE72116746EE3ECCF503802B580B418689EDD3517DA1D750434E53.png",[],{"type":17,"tag":57,"props":218,"children":219},{},[220],{"type":17,"tag":61,"props":221,"children":222},{},[223,225,228,230,233],{"type":23,"value":224},"内存优化",{"type":17,"tag":67,"props":226,"children":227},{},[],{"type":23,"value":229},"1、重计算",{"type":17,"tag":67,"props":231,"children":232},{},[],{"type":23,"value":234},"时间换空间：重计算技术可以不保存正向计算结果，让该内存可以被复用，然后在计算反向部分时，重新计算出正向结果。",{"type":17,"tag":25,"props":236,"children":237},{},[238],{"type":17,"tag":172,"props":239,"children":241},{"alt":174,"src":240},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230531061302.77376985899321143391397099848977:50540530071300:2400:31A3381EAEA5ECE389D9769BABE222509CB41484CC21F769033F1034914E5308.png",[],{"type":17,"tag":57,"props":243,"children":244},{},[245],{"type":17,"tag":61,"props":246,"children":247},{},[248,250,253],{"type":23,"value":249},"2、优化器并行——ZeRO",{"type":17,"tag":67,"props":251,"children":252},{},[],{"type":23,"value":254},"将参数和梯度分组放到不同卡上更新，再通过通信广播操作在设备间共享更新后的权值。",{"type":17,"tag":25,"props":256,"children":257},{},[258],{"type":17,"tag":172,"props":259,"children":261},{"alt":174,"src":260},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230531061322.08546134834143939752794908273100:50540530071300:2400:34722838BE5A990D9DF1FF9FAE2B3990D00F8A70F6479305AA983B7603824C5D.png",[],{"type":17,"tag":57,"props":263,"children":264},{},[265],{"type":17,"tag":61,"props":266,"children":267},{},[268,270,273],{"type":23,"value":269},"昇思MindSpore自动并行",{"type":17,"tag":67,"props":271,"children":272},{},[],{"type":23,"value":274},"用户的神经网络在单卡上无法计算，但是不知道如何配置算子策略。用户启动这种模式，MindSpore会自动针对每个算子进行配置策略，适合想要并行训练但是不知道如何配置策略的用户。",{"type":17,"tag":25,"props":276,"children":277},{},[278],{"type":17,"tag":38,"props":279,"children":280},{},[281],{"type":23,"value":282},"二、",{"type":17,"tag":25,"props":284,"children":285},{},[286],{"type":17,"tag":38,"props":287,"children":288},{},[289],{"type":23,"value":290},"下节课预告",{"type":17,"tag":25,"props":292,"children":293},{},[294],{"type":23,"value":295},"本周六我们邀请到**清华大学的夏箫同学进行代码大模型的技术分****析。**代码大模型被认为是ChatGPT的重要构成部分，通过代码预训练能够在代码生成、补全等能力外，提高模型的逻辑推理能力。清华大学知识工程实验室研发的CodeGeeX大模型是国内唯一基于全国产软硬件平台的代码大模型，基于昇腾+MindSpore实现了不逊于国外代码大模型的能力。本周直播的课程内容详见下方海报：",{"type":17,"tag":25,"props":297,"children":298},{},[299],{"type":17,"tag":172,"props":300,"children":302},{"alt":174,"src":301},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230531061410.27030088206016965593858790619964:50540530071300:2400:E167BCD317494BA8DB29527AE0A05C4D42B73429FEA84D813A1299FAEB05C212.png",[],{"type":17,"tag":25,"props":304,"children":305},{},[306],{"type":17,"tag":38,"props":307,"children":308},{},[309],{"type":23,"value":310},"三、",{"type":17,"tag":25,"props":312,"children":313},{},[314],{"type":17,"tag":38,"props":315,"children":316},{},[317],{"type":23,"value":318},"课程建议和课后答疑",{"type":17,"tag":25,"props":320,"children":321},{},[322],{"type":23,"value":323},"如果大家在运行代码中遇到什么问题，对课程有什么建议，或者希望我们增添什么内容，都可以在昇思MindSpore技术公开课大模型专题的代码仓中提issue进行反馈，或者加入课程讨论群，会有课程讲师在群中进行答疑。",{"type":17,"tag":25,"props":325,"children":326},{},[327],{"type":17,"tag":172,"props":328,"children":330},{"alt":7,"src":329},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/05/31/fdd80508ac774d76a4b039cf6a008b7a.png",[],{"title":7,"searchDepth":332,"depth":332,"links":333},4,[],"markdown","content:news:zh:2522.md","content","news/zh/2522.md","news/zh/2522","md",1776506066209]