[{"data":1,"prerenderedAt":220},["ShallowReactive",2],{"content-query-kYNz80AdUX":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":214,"_id":215,"_source":216,"_file":217,"_stem":218,"_extension":219},"/news/zh/2486","zh",false,"","【昇思MindSpore技术公开课】第四节课GPT的课程回顾来啦！","上周六（5月13日）我们继续为大家讲解了GPT2的核心创新点以及模型实现细节，并且将Transformer Decoder结构的核心部分Masked Self-attention进行了示意图和单步代码一一对应的详细解读。大家是否对GPT模型有了更加深入的理解呢？接下来我们对课程进行简单回顾，迎接下一节公开课的进一步深入。","2023-05-17","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/05/22/eb0bb45f28fc4e54b28c51f024ff815b.png","news",{"type":14,"children":15,"toc":211},"root",[16,24,29,38,43,53,62,67,74,79,86,114,119,127,132,137,150,158,170,183,191,199,204],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思mindspore技术公开课第四节课gpt的课程回顾来啦",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":9},{"type":17,"tag":25,"props":30,"children":31},{},[32],{"type":17,"tag":33,"props":34,"children":35},"strong",{},[36],{"type":23,"value":37},"一、课程回顾",{"type":17,"tag":25,"props":39,"children":40},{},[41],{"type":23,"value":42},"GPT2论文解读：",{"type":17,"tag":44,"props":45,"children":46},"ul",{},[47],{"type":17,"tag":48,"props":49,"children":50},"li",{},[51],{"type":23,"value":52},"Language Model任务的再解读：Next Token Predict和自回归预测",{"type":17,"tag":25,"props":54,"children":55},{},[56],{"type":17,"tag":57,"props":58,"children":61},"img",{"alt":59,"src":60},"image.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522054427.79086128608557161903254783485738:20230522063429:2400:45806A40AAD563AC5B8F85D5B54937C9CA9F48C258470A6262C8CBED80C6BEB6.png",[],{"type":17,"tag":25,"props":63,"children":64},{},[65],{"type":23,"value":66},"Task Conditioning：GPT2实现Zero Shot的核心方法，针对不同任务设计Task Condition（即Task instruction）进行预训练，面向新的生成任务，可以不进行有标注的微调，而使用instruction来引导模型生成结果。",{"type":17,"tag":25,"props":68,"children":69},{},[70],{"type":17,"tag":57,"props":71,"children":73},{"alt":59,"src":72},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522054449.29278843835286822767567181548691:20230522063429:2400:18A9D0472AAE812C99857C5E715B26F257480E69CE66CEF1144C7F4B27940748.png",[],{"type":17,"tag":25,"props":75,"children":76},{},[77],{"type":23,"value":78},"Masked Self-Attention的单步详解：",{"type":17,"tag":25,"props":80,"children":81},{},[82],{"type":17,"tag":57,"props":83,"children":85},{"alt":59,"src":84},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230522054514.35727726396214489908775355036473:20230522063429:2400:DC49198FE4A2DF7668B2B09ABCEDCCFB0A0394DC7F7B7B095FA95B481F2A986A.png",[],{"type":17,"tag":44,"props":87,"children":88},{},[89,94,99,104,109],{"type":17,"tag":48,"props":90,"children":91},{},[92],{"type":23,"value":93},"创建query、key、value",{"type":17,"tag":48,"props":95,"children":96},{},[97],{"type":23,"value":98},"切分Attention heads",{"type":17,"tag":48,"props":100,"children":101},{},[102],{"type":23,"value":103},"计算Attention Socre，需要加入Causal Mask来防止前面单词看到未来。",{"type":17,"tag":48,"props":105,"children":106},{},[107],{"type":23,"value":108},"合并Attention heads",{"type":17,"tag":48,"props":110,"children":111},{},[112],{"type":23,"value":113},"进行一层Projection",{"type":17,"tag":25,"props":115,"children":116},{},[117],{"type":23,"value":118},"使用GPT2训练一个文本摘要模型（自回归预训练）：",{"type":17,"tag":44,"props":120,"children":121},{},[122],{"type":17,"tag":48,"props":123,"children":124},{},[125],{"type":23,"value":126},"自回归任务的处理：",{"type":17,"tag":25,"props":128,"children":129},{},[130],{"type":23,"value":131},"1、静态文本长度，处理为1024，需要进行pad和truncate",{"type":17,"tag":25,"props":133,"children":134},{},[135],{"type":23,"value":136},"2、输入和输出分别为text[0:-1]和text[1:]",{"type":17,"tag":44,"props":138,"children":139},{},[140,145],{"type":17,"tag":48,"props":141,"children":142},{},[143],{"type":23,"value":144},"使用自动混合精度，配合Loss Scalar解决溢出问题",{"type":17,"tag":48,"props":146,"children":147},{},[148],{"type":23,"value":149},"自回归的文本生成预测",{"type":17,"tag":25,"props":151,"children":152},{},[153],{"type":17,"tag":33,"props":154,"children":155},{},[156],{"type":23,"value":157},"二、下节课预告",{"type":17,"tag":25,"props":159,"children":160},{},[161,163,168],{"type":23,"value":162},"本周六（5月20日）14：00-15：30，将由我们",{"type":17,"tag":33,"props":164,"children":165},{},[166],{"type":23,"value":167},"昇思MindSpore分布式训练技术专家，苏腾",{"type":23,"value":169},"为大家带来**《昇腾+昇思超大规模AI实践》**课程的讲解，这里悄悄剧透一下课程内容：",{"type":17,"tag":44,"props":171,"children":172},{},[173,178],{"type":17,"tag":48,"props":174,"children":175},{},[176],{"type":23,"value":177},"昇思MindSpore的分布式特性讲解",{"type":17,"tag":48,"props":179,"children":180},{},[181],{"type":23,"value":182},"昇思MindSpore分布式并行代码示例",{"type":17,"tag":25,"props":184,"children":185},{},[186],{"type":17,"tag":33,"props":187,"children":188},{},[189],{"type":23,"value":190},"三、",{"type":17,"tag":25,"props":192,"children":193},{},[194],{"type":17,"tag":33,"props":195,"children":196},{},[197],{"type":23,"value":198},"课程建议和课后答疑",{"type":17,"tag":25,"props":200,"children":201},{},[202],{"type":23,"value":203},"如果大家在运行代码中遇到什么问题，对课程有什么建议，或者希望我们增添什么内容，都可以在昇思MindSpore技术公开课大模型专题的代码仓中提issue进行反馈，或者加入课程讨论群，会有课程讲师在群中进行答疑。",{"type":17,"tag":25,"props":205,"children":206},{},[207],{"type":17,"tag":57,"props":208,"children":210},{"alt":7,"src":209},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/05/22/4984d906e42941e98d749cf70c588f50.png",[],{"title":7,"searchDepth":212,"depth":212,"links":213},4,[],"markdown","content:news:zh:2486.md","content","news/zh/2486.md","news/zh/2486","md",1776506065789]