[{"data":1,"prerenderedAt":226},["ShallowReactive",2],{"content-query-MflPxkdtkR":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":220,"_id":221,"_source":222,"_file":223,"_stem":224,"_extension":225},"/news/zh/2696","zh",false,"","杭州人工智能计算中心率先实现基于昇思MindSpore的LLaMa2、Stablediffusion2.1大模型训练部署","杭州人工智能计算中心成功实现LLaMA2、Stablediffusion2.1等主流大模型上线，是全国首个在自主可控环境及自主可控AI框架下成功上线的人工智能计算中心。","2023-08-11","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2023/08/28/0beedcfd4d3044f3b830d4d21b3e76ca.png","news",{"type":14,"children":15,"toc":217},"root",[16,24,30,35,44,49,56,65,70,75,80,85,90,95,102,110,115,120,125,130,135,142,147,154,159,167,177,182,189,194,202,207,212],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"杭州人工智能计算中心率先实现基于昇思mindspore的llama2stablediffusion21大模型训练部署",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"杭州人工智能计算中心成功实现LLaMA2、Stablediffusion2.1等主流大模型上线，是全国首个在自主可控环境及自主可控AI框架下成功上线的人工智能计算中心。昇思MindSpore是华为开源的首个业界全场景AI融合框架，原生支持AI大模型与科学智能。杭州人工智能计算中心基于昇思，以及大模型开发套件MindSpore Transformer将LLaMa2 70B、Stablediffusion2.1大模型适配到本地环境中，实现高效运行。",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":23,"value":34},"为方便开发者群体高效创新，杭州人工智能计算中心上线了昇思MindSpore全流程使能大模型的工具与方法，从开发、训练、微调到部署，覆盖低参微调算法、分布式推理，让开发者有更多样化的选择快速打造大模型",{"type":17,"tag":25,"props":36,"children":37},{},[38],{"type":17,"tag":39,"props":40,"children":43},"img",{"alt":41,"src":42},"image.png","https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033554.64544897987899959397066330144112:50540827014116:2400:9B22B5DD6C1D9939A57947D6ABE43F7CC434B0CB9383A9CF8844D74003A3E007.png",[],{"type":17,"tag":25,"props":45,"children":46},{},[47],{"type":23,"value":48},"在杭州AICC环境下，开发者可以通过notebook调试，通过训练作业拉起模型训练，几百张卡的分布式训练一键拉起，省去一台台搭建裸机环境的步骤。",{"type":17,"tag":25,"props":50,"children":51},{},[52],{"type":17,"tag":39,"props":53,"children":55},{"alt":41,"src":54},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033612.66784156368174277869342640883118:50540827014116:2400:19C160D2230DD7ECCB472A62B94B206CA09B4B28AAC45BE454A957E4D084E9C5.png",[],{"type":17,"tag":25,"props":57,"children":58},{},[59],{"type":17,"tag":60,"props":61,"children":62},"strong",{},[63],{"type":23,"value":64},"LLaMa2",{"type":17,"tag":25,"props":66,"children":67},{},[68],{"type":23,"value":69},"Llama 2包含了70亿、130亿和700亿参数的模型。Meta正式发布Llama 2后，Llama 2强大的参数规模以及性能表现，对OpenAI带来了不小的压力，人称“GPT-4最强平替”。",{"type":17,"tag":25,"props":71,"children":72},{},[73],{"type":23,"value":74},"Llama-2相比Llama-1有不少技术层面的改进，从而带来了模型性能、推理效率以及安全性等方面的有效提升。具体而言，重要的改进有以下几点：",{"type":17,"tag":25,"props":76,"children":77},{},[78],{"type":23,"value":79},"1、模型架构上使用Group-Query-Attention(GQA)来提高模型推理效率，语境长度从2K增加一倍到4K。",{"type":17,"tag":25,"props":81,"children":82},{},[83],{"type":23,"value":84},"2、预训练语料从1.4T tokens增加到2T tokens。",{"type":17,"tag":25,"props":86,"children":87},{},[88],{"type":23,"value":89},"3、在监督微调（SFT）阶段更加注重数据集质量，使用更少但质量更高的SFT数据相比使用百万量级的公开SFT数据，效果显著提升。",{"type":17,"tag":25,"props":91,"children":92},{},[93],{"type":23,"value":94},"4、引入了三项安全训练技术Supervised Safety Fine-Tuning、Safety RLHF、Safety Context Distillation 提升模型的安全性。",{"type":17,"tag":25,"props":96,"children":97},{},[98],{"type":17,"tag":39,"props":99,"children":101},{"alt":41,"src":100},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033644.80088510340323848286280631476155:50540827014116:2400:B02B76DCD5725B5982FAADF6C5B7828811A15671B01191B64D35364FBC1E9B10.png",[],{"type":17,"tag":25,"props":103,"children":104},{},[105],{"type":17,"tag":60,"props":106,"children":107},{},[108],{"type":23,"value":109},"Stablediffusion2.1",{"type":17,"tag":25,"props":111,"children":112},{},[113],{"type":23,"value":114},"Stable diffusion是一个基于Latent Diffusion Models（潜在扩散模型，LDMs）的文图生成（text-to-image）模型. 具体来说，Stable Diffusion采用 UNetModel 这种 Encoder-Decoder 结构来实现扩散的过程. 在Stable Diffusion中，通过将人类语言转换成机器能理解的数学向量，再结合语义向量，从纯噪声开始逐步去除噪声，生成图片信息隐变量，最后将图片信息隐变量转换为一张真正的图片。",{"type":17,"tag":25,"props":116,"children":117},{},[118],{"type":23,"value":119},"经迁移及优化后，sd2在aicc上支持了全量训练，lora训练，以及flashattention优化",{"type":17,"tag":25,"props":121,"children":122},{},[123],{"type":23,"value":124},"Flashattention作为优化显存的工具，有效的提高了训练batch_size 数量。",{"type":17,"tag":25,"props":126,"children":127},{},[128],{"type":23,"value":129},"以sd2.0为例",{"type":17,"tag":25,"props":131,"children":132},{},[133],{"type":23,"value":134},"未开启flashattention",{"type":17,"tag":25,"props":136,"children":137},{},[138],{"type":17,"tag":39,"props":139,"children":141},{"alt":41,"src":140},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033706.72324250909870802231206973791490:50540827014116:2400:714346717206D18504D865113046B7F4D65D17345C08908453EC17EE4BF6C0BB.png",[],{"type":17,"tag":25,"props":143,"children":144},{},[145],{"type":23,"value":146},"未开启flashattention后",{"type":17,"tag":25,"props":148,"children":149},{},[150],{"type":17,"tag":39,"props":151,"children":153},{"alt":41,"src":152},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033725.25527104964909870161034050078570:50540827014116:2400:3EDF0A97404FB9F9588DF62CB8E3540D34EC0A98F9920CFB743022819EE602F9.png",[],{"type":17,"tag":25,"props":155,"children":156},{},[157],{"type":23,"value":158},"提升了约60%的数据并发量，耗时仅提升10%",{"type":17,"tag":25,"props":160,"children":161},{},[162],{"type":17,"tag":60,"props":163,"children":164},{},[165],{"type":23,"value":166},"推理效果↓",{"type":17,"tag":25,"props":168,"children":169},{},[170],{"type":17,"tag":60,"props":171,"children":172},{},[173],{"type":17,"tag":39,"props":174,"children":176},{"alt":41,"src":175},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033744.81674773770725207800968163685262:50540827014116:2400:E143A56340BDD4CC8724DE4CC3C06FDE96079D2A603F5EE8AB1CF202DDA58139.png",[],{"type":17,"tag":25,"props":178,"children":179},{},[180],{"type":23,"value":181},"关键词：克苏鲁和红月",{"type":17,"tag":25,"props":183,"children":184},{},[185],{"type":17,"tag":39,"props":186,"children":188},{"alt":41,"src":187},"https://fileserver.developer.huaweicloud.com/FileServer/getFile/cmtybbs/e64/154/b38/90a1d5d431e64154b387b3660e356ff5.20230816033801.86345364322238357584266996558716:50540827014116:2400:D64C47F8CDFED66FA272AE8BFE081F8F1DBE920F97FE82628EBA915D729702A3.png",[],{"type":17,"tag":25,"props":190,"children":191},{},[192],{"type":23,"value":193},"关键词：河边看书的女孩",{"type":17,"tag":25,"props":195,"children":196},{},[197],{"type":17,"tag":60,"props":198,"children":199},{},[200],{"type":23,"value":201},"杭州人工智能计算中心",{"type":17,"tag":25,"props":203,"children":204},{},[205],{"type":23,"value":206},"杭州人工智能计算中心是全市首个公共服务性质的人工智能算力基础设施，致力于打造人工智能产业和数字经济的底座。由杭州高新区（滨江）管委会、政府投资，杭州高新智能科技有限公司建设与运营。一二期面积2700平方米，规模140P，并将逐步扩容。计算中心采用自主可控的昇腾AI集群，为政府、企业、科研机构、高校等提供公共普惠算力。该中心已于2022年5月20日在杭州市滨江区海创基地上线，提供AI模型训练、应用孵化、产业推广和人才培养等服务助力杭州国家新一代人工智能创新发展试验区、应用先导区建设。",{"type":17,"tag":25,"props":208,"children":209},{},[210],{"type":23,"value":211},"相关问题请咨询：",{"type":17,"tag":25,"props":213,"children":214},{},[215],{"type":23,"value":216},"陈先生 19550207033 / 0571-56101725",{"title":7,"searchDepth":218,"depth":218,"links":219},4,[],"markdown","content:news:zh:2696.md","content","news/zh/2696.md","news/zh/2696","md",1776506071106]