[{"data":1,"prerenderedAt":185},["ShallowReactive",2],{"content-query-xdQ11dHEHz":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":179,"_id":180,"_source":181,"_file":182,"_stem":183,"_extension":184},"/technology-blogs/zh/1055","zh",false,"","大模型落地实践思考（1）","大模型在应用落地中面临的挑战","2022-03-03","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2022/03/03/b5dd689bf0b74667a317c1ba1917cb6a.png","technology-blogs","大V博文",{"type":15,"children":16,"toc":176},"root",[17,25,31,44,49,61,69,80,88,102,113,126,137,154,165],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"大模型落地实践思考1",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":30},"作者：于璠",{"type":18,"tag":26,"props":32,"children":33},{},[34,36],{"type":24,"value":35},"链接：",{"type":18,"tag":37,"props":38,"children":42},"a",{"href":39,"rel":40},"https://zhuanlan.zhihu.com/p/463191289",[41],"nofollow",[43],{"type":24,"value":39},{"type":18,"tag":26,"props":45,"children":46},{},[47],{"type":24,"value":48},"从2020年GPT-3发布以来，国内外关于大模型的研究层出不群，各个大厂纷纷开始进行大模型的“军备竞赛”，致力于使模型更大、训练速度更快、覆盖领域更多和模型效果更好。但众所周知，大模型的研究投入是很大的，GPT-3训练一次的费用是460万美元，训练时间为355个GPU年，总成本据悉达到1200万美元，投入如此大的成本，大厂们肯定是希望能从大模型技术中获得回报的，但是，从目前大模型技术的发展来看，大模型的应用落地还面临着很多挑战。个人认为大模型在应用落地中面临的挑战可以归类为三个方面：场景、大模型调用方法和大模型迭代，下面以大规模语言模型为例逐一进行讨论。",{"type":18,"tag":26,"props":50,"children":51},{},[52],{"type":18,"tag":53,"props":54,"children":55},"strong",{},[56],{"type":18,"tag":53,"props":57,"children":58},{},[59],{"type":24,"value":60},"一. 场景",{"type":18,"tag":26,"props":62,"children":63},{},[64],{"type":18,"tag":53,"props":65,"children":66},{},[67],{"type":24,"value":68},"虽然大模型相比之前的模型展现出了令人惊艳的能力，但个人感觉，还没有达到“量变引起质变的程度”，大模型解决复杂问题的能力较弱或者相当不稳定，解决问题更多的是基于“记忆”而不是“思维”，这导致很难将大模型应用在容错率低的场景，即使引用了也需要人工干预，也就是说，目前的大模型很难开辟新的赛道和现金流业务。OpenAI在2021年联合github发布的Copilot，尝试在IDE领域为现有编程方式带来质变，但目前来看，还没有完全取得成功，如图1所示，Copilot解决复杂问题的能力也有限。",{"type":18,"tag":26,"props":70,"children":71},{},[72],{"type":18,"tag":53,"props":73,"children":74},{},[75],{"type":18,"tag":76,"props":77,"children":79},"img",{"alt":7,"src":78},"https://pic3.zhimg.com/80/v2-170b1a61c9f98385af2908e87760677a_720w.jpg",[],{"type":18,"tag":26,"props":81,"children":82},{},[83],{"type":18,"tag":53,"props":84,"children":85},{},[86],{"type":24,"value":87},"图1 Copilot解决复杂问题的能力有限",{"type":18,"tag":26,"props":89,"children":90},{},[91],{"type":18,"tag":53,"props":92,"children":93},{},[94],{"type":18,"tag":53,"props":95,"children":96},{},[97],{"type":18,"tag":53,"props":98,"children":99},{},[100],{"type":24,"value":101},"二.大模型调用方法",{"type":18,"tag":26,"props":103,"children":104},{},[105],{"type":18,"tag":53,"props":106,"children":107},{},[108],{"type":18,"tag":53,"props":109,"children":110},{},[111],{"type":24,"value":112},"大模型由于太过庞大，导致微调成本很高，如果大模型厂商要给每个用户都提供微调服务的话，在用户数量很大的情况下，微调成本和模型维护成本会变得相当高，最理想的情况是，大模型厂商只维护一套参数，仅提供推理服务，这样，通过加速大模型推理过程，可以最大限度地降低成本。但是，如果仅提供推理服务，那就只能进行大模型的zero-shot，但目前大模型的zero-shot效果一般来说是要比小模型的微调效果差的，这就使得大模型的竞争力严重不足。关于这个问题，复旦邱锡鹏老师的团队提出了“黑箱优化”的方法，感兴趣的朋友可以了解一下。",{"type":18,"tag":26,"props":114,"children":115},{},[116],{"type":18,"tag":53,"props":117,"children":118},{},[119],{"type":18,"tag":53,"props":120,"children":121},{},[122],{"type":18,"tag":76,"props":123,"children":125},{"alt":7,"src":124},"https://pic1.zhimg.com/80/v2-dc05de77455524226ded2579b5890364_720w.jpg",[],{"type":18,"tag":26,"props":127,"children":128},{},[129],{"type":18,"tag":53,"props":130,"children":131},{},[132],{"type":18,"tag":53,"props":133,"children":134},{},[135],{"type":24,"value":136},"图2 黑箱优化",{"type":18,"tag":26,"props":138,"children":139},{},[140],{"type":18,"tag":53,"props":141,"children":142},{},[143],{"type":18,"tag":53,"props":144,"children":145},{},[146],{"type":18,"tag":53,"props":147,"children":148},{},[149],{"type":18,"tag":53,"props":150,"children":151},{},[152],{"type":24,"value":153},"三.大模型迭代",{"type":18,"tag":26,"props":155,"children":156},{},[157],{"type":18,"tag":53,"props":158,"children":159},{},[160],{"type":18,"tag":53,"props":161,"children":162},{},[163],{"type":24,"value":164},"还有一个问题，就是大模型的迭代问题，如果想要解决预训练预料的时效性问题和提升大模型的能力，就要对大模型重新进行训练，如果迭代频率高的话，训练成本将变得不可接受，但迭代频率低的话，又将进一步降低大模型的竞争力，在这个问题上，课程学习/持续学习/Adapter等技术可能可以发挥一定的作用。",{"type":18,"tag":26,"props":166,"children":167},{},[168],{"type":18,"tag":53,"props":169,"children":170},{},[171],{"type":18,"tag":53,"props":172,"children":173},{},[174],{"type":24,"value":175},"以上简单介绍了大模型应用中存在的一些挑战，在之后的文章中有机会的话再进一步和大家进行更深入的探讨。",{"title":7,"searchDepth":177,"depth":177,"links":178},4,[],"markdown","content:technology-blogs:zh:1055.md","content","technology-blogs/zh/1055.md","technology-blogs/zh/1055","md",1776506111738]