[{"data":1,"prerenderedAt":306},["ShallowReactive",2],{"content-query-mnGgagjL16":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":300,"_id":301,"_source":302,"_file":303,"_stem":304,"_extension":305},"/news/zh/3659","zh",false,"","昇思携手openEuler打造的DeepSeek全栈开源方案亮相华为伙伴大会","2025年3月20 - 21日，深圳——在华为伙伴大会现场，OpenAtom openEuler（简称&quot;openEuler&quot;）社区联合MindSpore社区以生态共建者身份发布了openEuler与MindSpore DeepSeek全栈开源方案，此次发布标志着全栈开源方案发展成熟。现场演示&参与展台吸引了超100家行业龙头企业代表及产业专家围观。","2025-03-25","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/28/930573f5e3bc4708a48afadecdec745c.png","news",{"type":14,"children":15,"toc":297},"root",[16,24,30,38,43,48,53,71,76,86,91,107,112,120,127,132,140,145,150,158,163,168,176,181,186,194,199,204,212,217,222,230,235,240,245,253,258,265,270,278,290],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思携手openeuler打造的deepseek全栈开源方案亮相华为伙伴大会",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"2025年3月20 - 21日，深圳——在华为伙伴大会现场，OpenAtom openEuler（简称\"openEuler\"）社区联合MindSpore社区以生态共建者身份发布了openEuler与MindSpore DeepSeek全栈开源方案，此次发布标志着全栈开源方案发展成熟。现场演示&参与展台吸引了超100家行业龙头企业代表及产业专家围观。",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":17,"tag":34,"props":35,"children":37},"img",{"alt":7,"src":36},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/28/8e6708cef857424e88ec690d7eef429e.png",[],{"type":17,"tag":25,"props":39,"children":40},{},[41],{"type":23,"value":42},"本次发布的openEuler与MindSpore DeepSeek全栈开源方案，实现了端到端部署时长天级到分钟级，大并发推理吞吐达到1400Tokens/s，DeepSeek大模型推理性能开箱即优。openEuler实现以下核心技术：",{"type":17,"tag":25,"props":44,"children":45},{},[46],{"type":23,"value":47},"**1） Function Call：**实现语言大模型对工具的调用 ；",{"type":17,"tag":25,"props":49,"children":50},{},[51],{"type":23,"value":52},"**2）RAG：**通过检索增强生成构建企业领域知识库；",{"type":17,"tag":25,"props":54,"children":55},{},[56,69],{"type":17,"tag":57,"props":58,"children":59},"strong",{},[60,62,67],{"type":23,"value":61},"3）openEuler ",{"type":17,"tag":57,"props":63,"children":64},{},[65],{"type":23,"value":66},"大模型智能系统",{"type":23,"value":68},":",{"type":23,"value":70}," 通过Function Call 精准选择agent执行，提升任务执行效率；",{"type":17,"tag":25,"props":72,"children":73},{},[74],{"type":23,"value":75},"**4）openEuler 异构融合细粒度感知调度：**感知细粒度异构资源，对业务进行精准协同调度，提升整体推理性能；",{"type":17,"tag":25,"props":77,"children":78},{},[79,84],{"type":17,"tag":57,"props":80,"children":81},{},[82],{"type":23,"value":83},"5",{"type":23,"value":85},"**）vLLM-MindSpore插件：**支持MindSpore原生大模型接入vLLM框架，通过整图编译、量化等能力加速推理。",{"type":17,"tag":25,"props":87,"children":88},{},[89],{"type":23,"value":90},"**6）毕昇异构融合编译：**支持全链路软件栈编译优化和昇腾算子编译优化与融合。",{"type":17,"tag":25,"props":92,"children":93},{},[94,96,105],{"type":23,"value":95},"值得关注的是，此方案正式发布前，openEuler社区、MindSpore社区与北京大学完成了场景验证，首次打通openEuler与MindSpore DeepSeek全栈开源推理方案的生产环境部署实践。相关技术细节可浏览《",{"type":17,"tag":97,"props":98,"children":102},"a",{"href":99,"rel":100},"https://mp.weixin.qq.com/s?__biz=MzkyMjYzNjU0Ng==&mid=2247514251&idx=1&sn=d5336b23f2a10d06b6bb2782d1018030&scene=21#wechat_redirect",[101],"nofollow",[103],{"type":23,"value":104},"北京大学联合openEuler与MindSpore发布DeepSeek全栈开源解决方案",{"type":23,"value":106},"》。",{"type":17,"tag":25,"props":108,"children":109},{},[110],{"type":23,"value":111},"01",{"type":17,"tag":25,"props":113,"children":114},{},[115],{"type":17,"tag":57,"props":116,"children":117},{},[118],{"type":23,"value":119},"技术亮点剖析",{"type":17,"tag":25,"props":121,"children":122},{},[123],{"type":17,"tag":34,"props":124,"children":126},{"alt":7,"src":125},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/28/8e3dec0baf024056bbfe46164afe857b.png",[],{"type":17,"tag":25,"props":128,"children":129},{},[130],{"type":23,"value":131},"openEuler与MindSpore DeepSeek全栈开源方案，基于行业发展态势与全栈开源客户潜在需求，本次发布的全栈方案提出了以下技术亮点：",{"type":17,"tag":25,"props":133,"children":134},{},[135],{"type":17,"tag":57,"props":136,"children":137},{},[138],{"type":23,"value":139},"Function Call特性",{"type":17,"tag":25,"props":141,"children":142},{},[143],{"type":23,"value":144},"Function Call是大型语言模型（LLM）API中的革命性功能，它赋予开发者定义自定义函数的能力，使AI能够智能判断何时调用特定函数，并返回结构化数据。开发者通过JSON Schema定义函数名称、参数及类型等，系统智能分析用户输入的自然语言，当检测到需要调用外部功能时，模型自动匹配最合适的函数，生成符合预定格式的参数数据。这项技术彻底改变了传统API交互模式，将自然语言处理与程序化功能调用完美融合。",{"type":17,"tag":25,"props":146,"children":147},{},[148],{"type":23,"value":149},"该全栈开源技术架构中，openEuler与MindSpore DeepSeek基于vLLM支持Function Call特性。openEuler提供安全可靠的异构计算环境，MindSpore实现动态计算图优化，DeepSeek V3/R1模型作为基座处理语义理解。通过异构融合内存、异构融合调度、毕昇异构融合编译与MindSpore算子融合等技术，openEuler 大模型智能系统利用语言大模型对工具的调用。",{"type":17,"tag":25,"props":151,"children":152},{},[153],{"type":17,"tag":57,"props":154,"children":155},{},[156],{"type":23,"value":157},"RAG检索增强生成特性",{"type":17,"tag":25,"props":159,"children":160},{},[161],{"type":23,"value":162},"检索增强生成（RAG）作为大型语言模型（LLM）领域的一项关键创新，通过将实时检索系统与强大的文本生成能力相结合，有效地克服了传统模型受限于静态知识库的局限性。",{"type":17,"tag":25,"props":164,"children":165},{},[166],{"type":23,"value":167},"这种机制相当于为语言模型配备了一个可以实时更新的“外部大脑”，使得AI系统不仅能够保持其自然语言处理的优势，还能够动态地访问最新且最相关的领域知识。对于企业用户而言，openEuler 大模型智能系统 提供了一个理想的解决方案，可以基于私有领域的数据构建专门的知识库，并将其无缝集成至领先的大规模语言模型中。这使得企业能够迅速搭建起高效的问答系统，确保响应内容既贴合企业的具体需求，又能及时反映最新的行业动态和技术进展。",{"type":17,"tag":25,"props":169,"children":170},{},[171],{"type":17,"tag":57,"props":172,"children":173},{},[174],{"type":23,"value":175},"openEuler 大模型智能系统",{"type":17,"tag":25,"props":177,"children":178},{},[179],{"type":23,"value":180},"Agent是大型语言模型（LLM）的重要应用，Agent根据设定的目标，确定好需要履行特定角色，自主观测感知环境，检索历史记忆以及相关知识，通过推理规划分解任务并确定行动策略，并反馈作用于环境，以达成目标。在这个过程中Agent持续学习，以像人类一样不断进化。",{"type":17,"tag":25,"props":182,"children":183},{},[184],{"type":23,"value":185},"openEuler 大模型智能系统基于大模型构建智能运维、智能调优Agent, 通过推理抽象业务流程特征分解智能运维调优任务：运维流程分解为故障感知、故障定界、故障定位子任务，调优流程分解为数据采集、负载感知、参数推荐、智能调优子任务，通过Function Call精准选择小模型执行子任务，提升任务的执行效率。并且智能运维调优Agent结合故障模式库和调优参数知识库等领域知识，围绕RAG检索增强能力，智能推荐运维手段和调优参数，进一步提升了运维调优效率。",{"type":17,"tag":25,"props":187,"children":188},{},[189],{"type":17,"tag":57,"props":190,"children":191},{},[192],{"type":23,"value":193},"openEuler 异构融合细粒度感知调度",{"type":17,"tag":25,"props":195,"children":196},{},[197],{"type":23,"value":198},"通过实时采集计算节点状态、任务特征及业务优先级等多维度数据，在业务容器中构建动态决策模型，实现CPU核级、内存页级甚至缓存行级的资源划分，突破传统系统调度隔离边界，支持毫秒级资源配额动态调整。针对高并发场景下推理服务、分布式计算组件Host侧资源争用的痛点，利用NUMA感知的细粒度算力与内存资源隔离，降低单并发推理时延；通过线程特性感知的细粒度内存分配、高性能代码段大页机制，在控制内存开销的同时，提升Host侧性能与整体推理吞吐。",{"type":17,"tag":25,"props":200,"children":201},{},[202],{"type":23,"value":203},"针对MoE大模型数据并行不均衡与稀疏访存效特点，系统通过实时采集节点算力与设计拓扑结构，优先将算子下发进程映射到相应的NPU节点，减少跨NUMA通信开销。进一步通过Host/Device OS协同内存管理实现多粒度动态混合页、按需内存分配，减少页表访存开销同时提升显存利用效率，进而推高大并发推理吞吐。",{"type":17,"tag":25,"props":205,"children":206},{},[207],{"type":17,"tag":57,"props":208,"children":209},{},[210],{"type":23,"value":211},"MindSpore 图编译&量化&vLLM插件",{"type":17,"tag":25,"props":213,"children":214},{},[215],{"type":23,"value":216},"为了获得极致的DeepSeek-V3/R1推理性能，MindSpore通过Jit编译的方式将模型实时编译成计算图，通过模式匹配自动寻优Cube-Vector计算，Vector-Vector计算、通信-计算等多类型算子组合的融合策略。相比于单一算子，计算类算子融合可显著降低数据的存取耗时，通信-计算类融合可有效压缩通信气泡。",{"type":17,"tag":25,"props":218,"children":219},{},[220],{"type":23,"value":221},"为了适配vLLM框架，昇思MindSpore开发了vLLM-MindSpore插件，无缝支持了vLLM框架的Continuous Batching、Chunked Prefill等核心特性，并通过Multi-Step Scheduling缓解了服务调度时延瓶颈。",{"type":17,"tag":25,"props":223,"children":224},{},[225],{"type":17,"tag":57,"props":226,"children":227},{},[228],{"type":23,"value":229},"毕昇编译优化&异构融合编译",{"type":17,"tag":25,"props":231,"children":232},{},[233],{"type":23,"value":234},"毕昇编译器通过架构亲和优化、循环优化、多级并行优化、指令优化、智能编译选项和链接时优化等编译技术，能够显著提升ARM 架构（尤其鲲鹏处理器）上的应用性能。在openEuler与MindSpore DeepSeek全栈开源方案中，使用llvm for openEuler针对算子下发阶段的性能瓶颈，通过CFGO优化、选项调优和链接时优化等技术优化Python、Mindspore和Ray等应用，使代码布局更优，有效提高程序IPC；通过架构亲和的原子指令优化和Malloc、Memcpy/Memset高性能库优化，提高内存利用效率，降低访存开销，进而降低时延，提高吞吐率。",{"type":17,"tag":25,"props":236,"children":237},{},[238],{"type":23,"value":239},"毕昇异构融合算子优化技术为Multi-Step Scheduling等特性支持上，昇腾侧算子快速生成与编译支持，满足特性快速上线与开箱性能保证。结合Mindspore图编译，使能Vector-Vector、Cube-Vector、通信-计算等多类型融合算子的生成与编译优化。",{"type":17,"tag":25,"props":241,"children":242},{},[243],{"type":23,"value":244},"02",{"type":17,"tag":25,"props":246,"children":247},{},[248],{"type":17,"tag":57,"props":249,"children":250},{},[251],{"type":23,"value":252},"未来蓝图披露",{"type":17,"tag":25,"props":254,"children":255},{},[256],{"type":23,"value":257},"基于目前AI行业发展态势与开源软件客户潜在需求，openEuler全栈开源方案已规划出清晰的技术演进方向。通过下图所示技术路线，将在异构融合调度、DDR/HBM内存池、算子融合优化及异构编译器等方面实现持续突破。",{"type":17,"tag":25,"props":259,"children":260},{},[261],{"type":17,"tag":34,"props":262,"children":264},{"alt":7,"src":263},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/28/c493f827e2b04e1592d307584c72ede2.png",[],{"type":17,"tag":25,"props":266,"children":267},{},[268],{"type":23,"value":269},"03",{"type":17,"tag":25,"props":271,"children":272},{},[273],{"type":17,"tag":57,"props":274,"children":275},{},[276],{"type":23,"value":277},"产业生态共建",{"type":17,"tag":25,"props":279,"children":280},{},[281,283],{"type":23,"value":282},"openEuler社区已面向开发者开源核心技术方案，诚邀行业伙伴、高校与个人开发者交流合作方案，通过联合创新实验室加速场景落地。可添加小助手微信加入 SIG-Long微信交流群，或访问Gitee平台了解相关材料、提交issue （",{"type":17,"tag":97,"props":284,"children":287},{"href":285,"rel":286},"https://gitee.com/openeuler/llm%5C_solution%EF%BC%89%EF%BC%8C%E4%B8%8EopenEuler%E3%80%81MindSpore%E7%A4%BE%E5%8C%BA%E4%B8%93%E5%AE%B6%E5%85%B1%E7%AD%91%E6%99%BA%E8%83%BD%E6%9C%AA%E6%9D%A5%E3%80%82",[101],[288],{"type":23,"value":289},"https://gitee.com/openeuler/llm\\_solution），与openEuler、MindSpore社区专家共筑智能未来。",{"type":17,"tag":25,"props":291,"children":292},{},[293],{"type":17,"tag":34,"props":294,"children":296},{"alt":7,"src":295},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/28/458985de9bdb4117b998fa055d37b11e.png",[],{"title":7,"searchDepth":298,"depth":298,"links":299},4,[],"markdown","content:news:zh:3659.md","content","news/zh/3659.md","news/zh/3659","md",1776506086702]