[{"data":1,"prerenderedAt":216},["ShallowReactive",2],{"content-query-AVx5dOmhZL":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":210,"_id":211,"_source":212,"_file":213,"_stem":214,"_extension":215},"/news/zh/2576","zh",false,"","【昇思MindSpore技术公开课】第九节Instruction Tuning课程回顾来啦！","在上周六（6月17日），我们进行了昇思MindSpore技术公开课-大模型专题第九节课程：Instruct Tuning的讲解。课程中，我们介绍了Instruction Tuning的核心思想。","2023-06-21","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/06/29/721553365ce047a8a40c303b6b8144e4.png","news",{"type":14,"children":15,"toc":207},"root",[16,24,30,39,47,52,72,81,89,94,99,104,109,116,121,126,133,156,164,172,177,200],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思mindspore技术公开课第九节instruction-tuning课程回顾来啦",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"在上周六（6月17日），我们进行了昇思MindSpore技术公开课-大模型专题第九节课程：Instruct Tuning的讲解。课程中，我们介绍了Instruction Tuning的核心思想。并由此引出ChatGPT的前身：InstructGPT模型，与思维链（Chain-of-thoughts）思想。接下来我们对课程进行简单回顾，迎接下一节公开课的进一步深入。",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":17,"tag":34,"props":35,"children":36},"strong",{},[37],{"type":23,"value":38},"一、",{"type":17,"tag":25,"props":40,"children":41},{},[42],{"type":17,"tag":34,"props":43,"children":44},{},[45],{"type":23,"value":46},"课程回顾",{"type":17,"tag":25,"props":48,"children":49},{},[50],{"type":23,"value":51},"Instruction Tuning：",{"type":17,"tag":53,"props":54,"children":55},"ul",{},[56,62,67],{"type":17,"tag":57,"props":58,"children":59},"li",{},[60],{"type":23,"value":61},"Instruction Tuning的核心思想：让模型能够理解任务描述（指令）。",{"type":17,"tag":57,"props":63,"children":64},{},[65],{"type":23,"value":66},"FLAN模型：finetune LM better understand task description via other。通过Instruction Tuning来提高大模型的zero-shot能力。",{"type":17,"tag":57,"props":68,"children":69},{},[70],{"type":23,"value":71},"Instruction Tuning和prompting的区别对比",{"type":17,"tag":25,"props":73,"children":74},{},[75],{"type":17,"tag":76,"props":77,"children":80},"img",{"alt":78,"src":79},"image.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230628072503.28968473297027520693454858417366:50540628033456:2400:F4F6A9DB85E443373EF853E960D64035994C4C858A71B805DB663621B6A8D966.png",[],{"type":17,"tag":53,"props":82,"children":83},{},[84],{"type":17,"tag":57,"props":85,"children":86},{},[87],{"type":23,"value":88},"Instruction Tuning的局限性",{"type":17,"tag":25,"props":90,"children":91},{},[92],{"type":23,"value":93},"1、问题1：无法支持开放域创新性的任务，如写小说。",{"type":17,"tag":25,"props":95,"children":96},{},[97],{"type":23,"value":98},"2、问题2：语言模型对token-level的错误是同等惩罚的，但是实际上不同的token错误带来的影响差异会很大",{"type":17,"tag":25,"props":100,"children":101},{},[102],{"type":23,"value":103},"3、问题3：即使使用Instruction Tuning，还是没法将LM的训练目标和人类的需求对齐。",{"type":17,"tag":25,"props":105,"children":106},{},[107],{"type":23,"value":108},"InstructGPT：",{"type":17,"tag":25,"props":110,"children":111},{},[112],{"type":17,"tag":76,"props":113,"children":115},{"alt":78,"src":114},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230628072524.04896723467453679701331935759466:50540628033456:2400:121A7B0791897211DCB5223A8D2B6909813854B46566999E72B34D4B484E95F2.png",[],{"type":17,"tag":25,"props":117,"children":118},{},[119],{"type":23,"value":120},"从GPT到ChatGPT的发展，InstructGPT是ChatGPT的前身，Instruction Finetuning是ChatGPT必备的技术之一。",{"type":17,"tag":25,"props":122,"children":123},{},[124],{"type":23,"value":125},"Chain-of-thoughts：",{"type":17,"tag":25,"props":127,"children":128},{},[129],{"type":17,"tag":76,"props":130,"children":132},{"alt":78,"src":131},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230628072540.14734181381017030800449107672439:50540628033456:2400:7FBE4F8996A56207E78D7331BDA94AE5CCD691293BB2776271E4A22C59782182.png",[],{"type":17,"tag":53,"props":134,"children":135},{},[136,141,146,151],{"type":17,"tag":57,"props":137,"children":138},{},[139],{"type":23,"value":140},"思维链随着模型规模的增大，开始出现，被称为大模型的涌现能力。",{"type":17,"tag":57,"props":142,"children":143},{},[144],{"type":23,"value":145},"思维链Prompting可以通过在prompt中提供示例，让模型“举一反三”。",{"type":17,"tag":57,"props":147,"children":148},{},[149],{"type":23,"value":150},"使用Let’s think step by step.这种Zero-shot的思维链提示，也能让模型性能大幅提升。",{"type":17,"tag":57,"props":152,"children":153},{},[154],{"type":23,"value":155},"新时代的职业，Prompt工程师。",{"type":17,"tag":25,"props":157,"children":158},{},[159],{"type":17,"tag":34,"props":160,"children":161},{},[162],{"type":23,"value":163},"二、",{"type":17,"tag":25,"props":165,"children":166},{},[167],{"type":17,"tag":34,"props":168,"children":169},{},[170],{"type":23,"value":171},"下节课预告",{"type":17,"tag":25,"props":173,"children":174},{},[175],{"type":23,"value":176},"本周六（6月24日）课程，我们将为大家带来昇思MindSpore技术公开课-大模型专题第十节课程：RLHF的讲解，探究ChatGPT的“语言艺术”。本周直播的课程内容有：",{"type":17,"tag":53,"props":178,"children":179},{},[180,185,190,195],{"type":17,"tag":57,"props":181,"children":182},{},[183],{"type":23,"value":184},"强化学习与PPO算法",{"type":17,"tag":57,"props":186,"children":187},{},[188],{"type":23,"value":189},"InstructGPT/ChatGPT中的RLHF",{"type":17,"tag":57,"props":191,"children":192},{},[193],{"type":23,"value":194},"动手训练一个Reward模型",{"type":17,"tag":57,"props":196,"children":197},{},[198],{"type":23,"value":199},"使用GPT2实现ChatGPT全流程（基于人工反馈的评论生成模型）",{"type":17,"tag":25,"props":201,"children":202},{},[203],{"type":17,"tag":76,"props":204,"children":206},{"alt":78,"src":205},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230628072607.85709003843887023051151207619756:50540628033456:2400:8F07AC2905EFA3C45F6F7A6042F1209449E08B13539F5F3A388D80DBB58634B9.png",[],{"title":7,"searchDepth":208,"depth":208,"links":209},4,[],"markdown","content:news:zh:2576.md","content","news/zh/2576.md","news/zh/2576","md",1776506067645]