[{"data":1,"prerenderedAt":231},["ShallowReactive",2],{"content-query-nGqVHXbhCY":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":225,"_id":226,"_source":227,"_file":228,"_stem":229,"_extension":230},"/news/zh/3805","zh",false,"","昇思MindSpore 2.7版本首发“焕新社区” ——全面升级超大规模集群大模型训推技术","昇思开源社区携手中移九天人工智能科技（北京）有限公司宣布昇思MindSpore AI框架2.7版本将同步在社区首发","2025-07-28","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/08/01/891599fd85284b548a2b80dc2f575fb2.png","news",{"type":14,"children":15,"toc":222},"root",[16,24,30,38,43,48,64,69,77,92,97,105,123,131,149,163,168,173,178,183,188,202,207,212,217],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思mindspore-27版本首发焕新社区-全面升级超大规模集群大模型训推技术",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"【上海，2025年7月27日，世界人工智能大会2025】7月26日，在2025世界人工智能大会“AI焕新 产业共赢”企业人工智能产业发展论坛上，由国务院国资委统筹推进，中国移动牵头建设，聚合央企合力和产业链力量共同打造的人工智能“焕新社区”正式发布。同期，昇思开源社区携手中移九天人工智能科技（北京）有限公司（九天人工智能研究院），宣布昇思MindSpore AI框架2.7版本将同步在社区首发，该版本代码也将上传至昇思MindSpore代码仓托管，并同步在社区设置链接。",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":17,"tag":34,"props":35,"children":37},"img",{"alt":7,"src":36},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/08/01/0cd85bb75ce34af8b2230a9398f6c32b.png",[],{"type":17,"tag":25,"props":39,"children":40},{},[41],{"type":23,"value":42},"昇思MindSpore AI框架作为使能大模型开发全流程的基础软件底座，本次升级聚焦大模型训练效率、推理性能及系统可视化三大领域，通过20余项技术创新，实现稀疏专家模型（MoE）训练性能提升70%、推理吞吐提升15%，为全球开发者提供更高效的AI全流程开发体验。",{"type":17,"tag":25,"props":44,"children":45},{},[46],{"type":23,"value":47},"中移九天人工智能科技（北京）有限公司（九天人工智能研究院）与昇思MindSpore深度协同，在大模型训练领域持续创新，完成了超大规模集群训练多维混合并行优化，包括支持drop&dropless MOE架构、新增流水序列并行等，并在强化学习后训练、训练精度监控和故障恢复等多领域创新，助力移动完成九天系列多个模型训练，训练效率提升超过30%。围绕模型-框架的双向互动实现基础软件与大模型技术迭代，促进AI软件技术不断演进，本次发布的MindSpore 2.7版本实现以下几大方面能力升级：",{"type":17,"tag":25,"props":49,"children":50},{},[51,57,59],{"type":17,"tag":52,"props":53,"children":54},"strong",{},[55],{"type":23,"value":56},"# 01",{"type":23,"value":58}," ",{"type":17,"tag":52,"props":60,"children":61},{},[62],{"type":23,"value":63},"MoE训练架构全面进化",{"type":17,"tag":25,"props":65,"children":66},{},[67],{"type":23,"value":68},"为应对MoE模型复杂并行策略挑战，MindSpore 2.7版本在推出三大训练技术创新：",{"type":17,"tag":25,"props":70,"children":71},{},[72],{"type":17,"tag":52,"props":73,"children":74},{},[75],{"type":23,"value":76},"1、双模式负载均衡：",{"type":17,"tag":78,"props":79,"children":80},"ul",{},[81,87],{"type":17,"tag":82,"props":83,"children":84},"li",{},[85],{"type":23,"value":86},"Capacity模式：固定专家负载，通过Pad/Drop统一计算形状，提升训练效率；",{"type":17,"tag":82,"props":88,"children":89},{},[90],{"type":23,"value":91},"Dropless模式：动态分配通信计算资源，增强系统稳定性；",{"type":17,"tag":25,"props":93,"children":94},{},[95],{"type":23,"value":96},"**2、自定义灵活并行：**基于框架静态图动态Shape能力和sharding自动重排布能力，新增Morph自定义并行机制，灵活支持专家并行等复杂并行模式；",{"type":17,"tag":25,"props":98,"children":99},{},[100],{"type":17,"tag":52,"props":101,"children":102},{},[103],{"type":23,"value":104},"3、多层次性能优化：",{"type":17,"tag":78,"props":106,"children":107},{},[108,113,118],{"type":17,"tag":82,"props":109,"children":110},{},[111],{"type":23,"value":112},"**通信层：**机间零冗余通信、亲和性绑核优化；",{"type":17,"tag":82,"props":114,"children":115},{},[116],{"type":23,"value":117},"**计算层：**FA算子融合、双流水线掩盖（1b1f+DualPipeV）消除流水线气泡；",{"type":17,"tag":82,"props":119,"children":120},{},[121],{"type":23,"value":122},"**内存层：**细粒度D2H Swap机制提升内存利用率，支撑千亿模型训练",{"type":17,"tag":25,"props":124,"children":125},{},[126],{"type":17,"tag":52,"props":127,"children":128},{},[129],{"type":23,"value":130},"4、后训练强化学习能力提升：",{"type":17,"tag":78,"props":132,"children":133},{},[134,139,144],{"type":17,"tag":82,"props":135,"children":136},{},[137],{"type":23,"value":138},"实现支持GRPO训练移动九天，Qwen2.5-7B/32B等模型",{"type":17,"tag":82,"props":140,"children":141},{},[142],{"type":23,"value":143},"支持2k~32k长序列正常运行，支持全量DeepSeekV3强化学习训练",{"type":17,"tag":82,"props":145,"children":146},{},[147],{"type":23,"value":148},"实现训推共卡、支持动态packing训练、推理均衡采样、细粒度H2D/D2H swap、6D并行权重重排、断点续训等特性",{"type":17,"tag":25,"props":150,"children":151},{},[152,157,158],{"type":17,"tag":52,"props":153,"children":154},{},[155],{"type":23,"value":156},"# 02",{"type":23,"value":58},{"type":17,"tag":52,"props":159,"children":160},{},[161],{"type":23,"value":162},"大模型推理性能突破",{"type":17,"tag":25,"props":164,"children":165},{},[166],{"type":23,"value":167},"针对SOTA模型（如DeepSeek-V3/R1、Qwen3）的推理场景，2.7版本通过四维技术重构：",{"type":17,"tag":25,"props":169,"children":170},{},[171],{"type":23,"value":172},"**1、融合算子革新：**新增MoeInitRoutingQuant、MultiLatentAttention等稀疏计算专用算子，结合Combine/Dispatch通信优化算子，降低专家路由开销40%；",{"type":17,"tag":25,"props":174,"children":175},{},[176],{"type":23,"value":177},"**2、vLLM深度适配：**升级支持vLLM v0.8.3架构，集成Prefix Caching（前缀缓存）、Chunked Prefill（分块预填充）等特性；",{"type":17,"tag":25,"props":179,"children":180},{},[181],{"type":23,"value":182},"**3、极致量化压缩：**新增W8A8静态量化（性能+5%）与W4A16量化方案，首次实现在单台Atlas 800I A2（64GB）服务器+MindSpore环境下部署DeepSeek 671B模型；",{"type":17,"tag":25,"props":184,"children":185},{},[186],{"type":23,"value":187},"**4、混合并行推理：**支持TP+DP+EP并行与MTP（多任务并行）投机推理，多请求吞吐性能跃升35%+。",{"type":17,"tag":25,"props":189,"children":190},{},[191,196,197],{"type":17,"tag":52,"props":192,"children":193},{},[194],{"type":23,"value":195},"# 03",{"type":23,"value":58},{"type":17,"tag":52,"props":198,"children":199},{},[200],{"type":23,"value":201},"在线监测工具实现训练性能“透视”",{"type":17,"tag":25,"props":203,"children":204},{},[205],{"type":23,"value":206},"针对超大集群训练痛点，**升级MindSpore Profiler技术，**在大集群训练场景下，用户想在训练过程中能够实时看到模型性能瓶颈点，需要在线监测能力。",{"type":17,"tag":25,"props":208,"children":209},{},[210],{"type":23,"value":211},"MindSpore2.7版本新增了MindSpore Profiler接入在线监测平台功能，用户在使用MindSpore Profiler框架集群训练场景下能够通过平台的monitor功能（常态监测）实时观察到训练的性能劣化点，实现性能问题的初步定位，后续可以通过平台的npu trace dump功能（精准采集）采集完整的性能数据，分析、定位性能瓶颈点。",{"type":17,"tag":25,"props":213,"children":214},{},[215],{"type":23,"value":216},"通过 \"常态监测+精准采集\" 的组合策略，该技术既能满足集群长稳训练时的实时监测需求，又能针对性能瓶颈进行定向分析，显著提升模型训练效率。",{"type":17,"tag":25,"props":218,"children":219},{},[220],{"type":23,"value":221},"未来，昇思MindSpore将继续坚持开源开放，使能九天人工智能研究院加速开展大模型创新，并携手焕新社区等伙伴，共筑人工智能产业繁荣生态。",{"title":7,"searchDepth":223,"depth":223,"links":224},4,[],"markdown","content:news:zh:3805.md","content","news/zh/3805.md","news/zh/3805","md",1776506089749]