[{"data":1,"prerenderedAt":723},["ShallowReactive",2],{"content-query-HdpVw0TGmf":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":717,"_id":718,"_source":719,"_file":720,"_stem":721,"_extension":722},"/news/zh/3639","zh",false,"","携手北京大学，MindSpore+openEuler打造支持vLLM的DeepSeek全栈开源推理方案","2025年，以DeepSeek-R1为代表的AI大模型正以惊人的速度重塑产业格局。","2025-03-09","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/14/70a39ef8de3a4135b63714fc0bea385a.png","news",{"type":14,"children":15,"toc":692},"root",[16,24,30,39,51,63,75,83,98,109,120,139,149,187,195,205,210,215,224,229,234,243,248,257,314,333,452,461,466,475,480,487,496,508,517,537,546,556,566,574,582,591,596,604,609,617,624,629,638,647,652,661,666,671,680,685],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"携手北京大学mindsporeopeneuler打造支持vllm的deepseek全栈开源推理方案",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"2025年，以DeepSeek-R1为代表的AI大模型正以惊人的速度重塑产业格局。短短7天用户破亿、多模态交互与低算力需求突破硬件限制，这些成就印证了AI技术走向规模落地的临界点已至。然而，将AI融入到具体产业，还面临着一些问题：",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":17,"tag":34,"props":35,"children":36},"strong",{},[37],{"type":23,"value":38},"从产业上看：",{"type":17,"tag":25,"props":40,"children":41},{},[42,44,49],{"type":23,"value":43},"1、",{"type":17,"tag":34,"props":45,"children":46},{},[47],{"type":23,"value":48},"算力与模型的割裂",{"type":23,"value":50},"：厂商需为不同硬件重复适配模型，开发成本陡增；",{"type":17,"tag":25,"props":52,"children":53},{},[54,56,61],{"type":23,"value":55},"2、",{"type":17,"tag":34,"props":57,"children":58},{},[59],{"type":23,"value":60},"生态孤岛化",{"type":23,"value":62},"：各厂商自建技术栈，导致跨平台协作效率低下；",{"type":17,"tag":25,"props":64,"children":65},{},[66,68,73],{"type":23,"value":67},"3、",{"type":17,"tag":34,"props":69,"children":70},{},[71],{"type":23,"value":72},"长尾需求难满足",{"type":23,"value":74},"：中小开发者受限于算力与框架兼容性，难以复用头部模型能力。",{"type":17,"tag":25,"props":76,"children":77},{},[78],{"type":17,"tag":34,"props":79,"children":80},{},[81],{"type":23,"value":82},"从技术上看：",{"type":17,"tag":25,"props":84,"children":85},{},[86,88,96],{"type":23,"value":87},"1、****混合专家（",{"type":17,"tag":34,"props":89,"children":90},{},[91],{"type":17,"tag":34,"props":92,"children":93},{},[94],{"type":23,"value":95},"MoE）架构的适配性挑战",{"type":23,"value":97},"：专家模型与硬件内存存在匹配困境，同时专家负载不均与通信开销过高；",{"type":17,"tag":25,"props":99,"children":100},{},[101,102,107],{"type":23,"value":55},{"type":17,"tag":34,"props":103,"children":104},{},[105],{"type":23,"value":106},"多模型协同与训推一体的系统挑战",{"type":23,"value":108},"：多模型动态交互、训推状态切换、资源动态分配引发协同困难，训推一体化软件栈的易用性不足；",{"type":17,"tag":25,"props":110,"children":111},{},[112,113,118],{"type":23,"value":67},{"type":17,"tag":34,"props":114,"children":115},{},[116],{"type":23,"value":117},"长序列推理与稀疏计算的性能挑战",{"type":23,"value":119},"：长序列KV Cache存在容量瓶颈；稀疏计算引发的向量化效率下降。",{"type":17,"tag":25,"props":121,"children":122},{},[123,125,130,132,137],{"type":23,"value":124},"DeepSeek引发的挑战，本质上是AI规模化落地的必经之痛。解决这些难题需硬件厂商、框架开发者与行业用户深度协同，通过",{"type":17,"tag":34,"props":126,"children":127},{},[128],{"type":23,"value":129},"全栈开放生态共建",{"type":23,"value":131},"与",{"type":17,"tag":34,"props":133,"children":134},{},[135],{"type":23,"value":136},"分层协同性能提升",{"type":23,"value":138},"，实现从单点突破到系统级效能跃迁。",{"type":17,"tag":140,"props":141,"children":143},"h3",{"id":142},"_01全栈开放生态共建",[144],{"type":17,"tag":34,"props":145,"children":146},{},[147],{"type":23,"value":148},"# 01****全栈开放生态共建",{"type":17,"tag":25,"props":150,"children":151},{},[152,157,159,164,166,171,173,178,180,185],{"type":17,"tag":34,"props":153,"children":154},{},[155],{"type":23,"value":156},"北京大学联合OpenAtom openEuler（简称\"openEuler\") 开源社区与MindSpore社区",{"type":23,"value":158},"，推出面向大模型的全栈开源方案，以",{"type":17,"tag":34,"props":160,"children":161},{},[162],{"type":23,"value":163},"操作系统+AI框架+模型生态",{"type":23,"value":165},"的三层开放架构，替换",{"type":17,"tag":34,"props":167,"children":168},{},[169],{"type":23,"value":170},"操作系统",{"type":23,"value":172},"和",{"type":17,"tag":34,"props":174,"children":175},{},[176],{"type":23,"value":177},"DL框架",{"type":23,"value":179},"，秉承",{"type":17,"tag":34,"props":181,"children":182},{},[183],{"type":23,"value":184},"代码开源+标准开放+生态共建",{"type":23,"value":186},"的理念，逐步成为智能时代的全国产化的数字基座。",{"type":17,"tag":25,"props":188,"children":189},{},[190],{"type":17,"tag":191,"props":192,"children":194},"img",{"alt":7,"src":193},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/14/2a6c79423b4149d69d9395b135770684.png",[],{"type":17,"tag":196,"props":197,"children":199},"h4",{"id":198},"_1对上兼容多元大模型生态普惠ai",[200],{"type":17,"tag":34,"props":201,"children":202},{},[203],{"type":23,"value":204},"1****对上：兼容多元大模型生态，普惠AI",{"type":17,"tag":25,"props":206,"children":207},{},[208],{"type":23,"value":209},"支持DeepSeek、LLaMA系列等主流模型接入，通过归一化的开源推理软件栈，保证不同模型对资源的动态调配，在生态上做到统一演进，且开箱即优，避免“重复造轮子”；",{"type":17,"tag":25,"props":211,"children":212},{},[213],{"type":23,"value":214},"集成模型微调与蒸馏能力，优化增强RAG流程搭建，结合DeepSeek群体策略优化经验，降低长尾场景定制门槛。",{"type":17,"tag":196,"props":216,"children":218},{"id":217},"_2对下异构算力无缝接入ai基座",[219],{"type":17,"tag":34,"props":220,"children":221},{},[222],{"type":23,"value":223},"2****对下：异构算力无缝接入，AI基座",{"type":17,"tag":25,"props":225,"children":226},{},[227],{"type":23,"value":228},"通过硬件抽象层兼容GPU、NPU及国产芯片，释放DeepSeek低能耗技术红利；",{"type":17,"tag":25,"props":230,"children":231},{},[232],{"type":23,"value":233},"在极致资源约束下，资源动态调度诉求强烈，通过全栈协同优化降低模型资源消耗获得竞争优势。",{"type":17,"tag":140,"props":235,"children":237},{"id":236},"_02分层协同性能提升",[238],{"type":17,"tag":34,"props":239,"children":240},{},[241],{"type":23,"value":242},"# 02****分层协同性能提升",{"type":17,"tag":25,"props":244,"children":245},{},[246],{"type":23,"value":247},"通过openEuler、MindSpore与vLLM/RAY间的分层协同，为DeepSeek-R1大模型带来了吞吐性能与易用性的显著提升。核心技术点如下：",{"type":17,"tag":196,"props":249,"children":251},{"id":250},"_1openeuler",[252],{"type":17,"tag":34,"props":253,"children":254},{},[255],{"type":23,"value":256},"1****openEuler",{"type":17,"tag":25,"props":258,"children":259},{},[260,265,267,272,274,279,281,286,288,312],{"type":17,"tag":34,"props":261,"children":262},{},[263],{"type":23,"value":264},"异构融合调度：负载感知MoE冷热专家，任务细粒度调度提升推理性能",{"type":23,"value":266}," 负载感知的冷热MoE专家动态识别和并行调度，稀疏MoE计算分层细粒度拆分到不同进程部署在多样算力； 共享资源细粒度按需控制，支持MoE专家均衡调度，计算/通信细粒度并发； 针对高并发场景下推理服务、分布式计算组件Host侧资源争用的痛点，利用NUMA感知的细粒度算力与内存资源隔离，提升推理整体性能。 ",{"type":17,"tag":34,"props":268,"children":269},{},[270],{"type":23,"value":271},"异构融合内存：高效管理异构内存，减小系统内存碎片，提升系统推理性能",{"type":23,"value":273}," 针对推理服务高并发场景，通过线程特性感知的细粒度内存分配、高性能代码段大页机制，在控制内存开销的同时，提升Host侧性能与整体推理吞吐； 针对MoE架构的稀疏访存特征，通过Host/Device协同内存管理实现多粒度动态混合页与按需内存分配，减少页表访存开销同时提升显存利用效率； 针对大模型推理服务面临的显存容量挑战，基于MoE架构的稀疏计算特征，利用运行时-OS协同设计实现高效专家超分部署，提升显存利用率与整体推理吞吐。 ",{"type":17,"tag":34,"props":275,"children":276},{},[277],{"type":23,"value":278},"异构融合编译：毕昇编译优化，减少算子下发耗时，提升算子性能",{"type":23,"value":280}," ",{"type":17,"tag":34,"props":282,"children":283},{},[284],{"type":23,"value":285},"架构亲和编译优化",{"type":23,"value":287},"：通过架构亲和的原子指令优化和Malloc、Memcpy高性能库优化，降低各类锁的代价，提高内存利用效率，降低访存开销，进而降低时延，提高吞吐率；算子编译阶段使能智能感知流水优化，基于数据依赖关系深度分析和自适应同步决策机制，自动插入最优同步指令实现高效的多级流水并行；通过昇腾算子抽象层与芯片ISA的智能映射，实现指令级并行优化，极大发挥芯片理论算力； ",{"type":17,"tag":34,"props":289,"children":290},{},[291],{"type":17,"tag":34,"props":292,"children":293},{},[294,296,310],{"type":23,"value":295},"多维融合",{"type":17,"tag":34,"props":297,"children":298},{},[299],{"type":17,"tag":34,"props":300,"children":301},{},[302],{"type":17,"tag":34,"props":303,"children":304},{},[305],{"type":17,"tag":34,"props":306,"children":307},{},[308],{"type":23,"value":309},"编译",{"type":23,"value":311},"优化",{"type":23,"value":313},"：针对算子下发阶段前端性能瓶颈较高的特点，通过CFGO优化技术，借助运行时信息，编译器进行精准的代码布局优化，有效提高程序IPC，降低算子下发时延；多维融合加速能够自动实现向量类算子融合、矩阵-向量类算子融合，减少数据搬运开销，并通过细粒度并行进一步提升算子性能，快速满足用户验证模型算法和提升模型开箱性能。",{"type":17,"tag":196,"props":315,"children":317},{"id":316},"_2mindspore图编译将模型编译为计算图通过模式匹配自动将小算子融为大算子",[318],{"type":17,"tag":34,"props":319,"children":320},{},[321,323,331],{"type":23,"value":322},"2",{"type":17,"tag":34,"props":324,"children":325},{},[326],{"type":17,"tag":34,"props":327,"children":328},{},[329],{"type":23,"value":330},"MindSpore",{"type":23,"value":332},"图编译：将模型编译为计算图，通过模式匹配自动将小算子融为大算子",{"type":17,"tag":25,"props":334,"children":335},{},[336,341,343,348,350,355,357,365,367,381,383,397,399,413,415,429,431,436,438,443,445,450],{"type":17,"tag":34,"props":337,"children":338},{},[339],{"type":23,"value":340},"图生成",{"type":23,"value":342},"：MindSpore通过JIT编译自动将模型的python类或者函数编译成一张完整的计算图，JIT编译提供了多种方式(ast/bytecode/trace）以满足不同场景的用途，覆盖了绝大部分Python语法。 ",{"type":17,"tag":34,"props":344,"children":345},{},[346],{"type":23,"value":347},"自动融合",{"type":23,"value":349},"：基于计算图通过自动模式匹配实现算子融合，将小算子融合成大颗粒的算子。大算子既减少Host下发的开销，同时也大大缩短了Device的计算时延。在DeepSeek V3/R1模型中实现了QKV/FFN+Split融合、Transpose+BatchMatMul+Transpose融合、Swiglu融合以及Norm类融合，大幅度减少了算子数量。 ",{"type":17,"tag":34,"props":351,"children":352},{},[353],{"type":23,"value":354},"动态shape支持",{"type":23,"value":356},"：计算图的执行需要支持动态shape以满足推理场景输入输出序列长度以及batch size的动态变化，相比于静态shape的整图下沉，动态shape的计算图执行需要每个iteration在Host侧重新执执行shape推导以及申请显存等操作，为了避免Host成为瓶颈，MindSpore通过Shape推导和显存申请、算子Tiling数据计算以及算子下发三级流水优化，实现Host计算和Device计算的掩盖。 ",{"type":17,"tag":34,"props":358,"children":359},{},[360],{"type":17,"tag":34,"props":361,"children":362},{},[363],{"type":23,"value":364},"模型压缩",{"type":23,"value":366},"****：",{"type":17,"tag":34,"props":368,"children":369},{},[370],{"type":17,"tag":34,"props":371,"children":372},{},[373],{"type":17,"tag":34,"props":374,"children":375},{},[376],{"type":17,"tag":34,"props":377,"children":378},{},[379],{"type":23,"value":380},"金箍棒",{"type":23,"value":382},"工具，",{"type":17,"tag":34,"props":384,"children":385},{},[386],{"type":17,"tag":34,"props":387,"children":388},{},[389],{"type":17,"tag":34,"props":390,"children":391},{},[392],{"type":17,"tag":34,"props":393,"children":394},{},[395],{"type":23,"value":396},"快速",{"type":23,"value":398},"实现",{"type":17,"tag":34,"props":400,"children":401},{},[402],{"type":17,"tag":34,"props":403,"children":404},{},[405],{"type":17,"tag":34,"props":406,"children":407},{},[408],{"type":17,"tag":34,"props":409,"children":410},{},[411],{"type":23,"value":412},"模型量化",{"type":23,"value":414},"算法及",{"type":17,"tag":34,"props":416,"children":417},{},[418],{"type":17,"tag":34,"props":419,"children":420},{},[421],{"type":17,"tag":34,"props":422,"children":423},{},[424],{"type":17,"tag":34,"props":425,"children":426},{},[427],{"type":23,"value":428},"量化",{"type":23,"value":430},"推理********全流程**** 金箍棒是华为昇思 MindSpore 团队与华为诺亚方舟实验室联合研发的模型压缩工具，依靠 MindSpore Rewrite 模块，为算法开发者屏蔽网络差异和硬件细节，提升算法接入与调优效率，同时提供了可视化、量化损失分析以及Summary 等工具。 我们使用金箍棒通过不同量化方式，来尝试平衡DeepSeek-R1的精度和性能： ",{"type":17,"tag":34,"props":432,"children":433},{},[434],{"type":23,"value":435},"8bit权重量化",{"type":23,"value":437},"：对 DeepSeek-R1 进行8bit 权重量化，使权重显存占用降为1/2，小batch_size场景推理性能提升明显，但大batch_size场景推理性能变差，分析发现是权重量化矩阵乘算子随着batch_size增大性能会下降。 ",{"type":17,"tag":34,"props":439,"children":440},{},[441],{"type":23,"value":442},"SmoothQuant 8bit量化",{"type":23,"value":444},"：为提升大batch_size场景的性能，用SmoothQuant 8bit 全量化，测试发现随batch_size增加，吞吐量线性度良好，但网络量化精度损失仍较大。 ",{"type":17,"tag":34,"props":446,"children":447},{},[448],{"type":23,"value":449},"混合量化",{"type":23,"value":451},"：为降量化精度损失，对精度较敏感的FeedForward层用激活动态量化，损失部分性能提升来提升量化精度，MLA层用Outlier-Suppression+异常值抑制算法替代SmoothQuant进一步提升精度。 经多次尝试，最终以CEval精度损失2分的代价，实现DeepSeek-R1部署显存和算力需求减半。",{"type":17,"tag":196,"props":453,"children":455},{"id":454},"_3算力集群训推平台",[456],{"type":17,"tag":34,"props":457,"children":458},{},[459],{"type":23,"value":460},"3****算力集群训推平台",{"type":17,"tag":25,"props":462,"children":463},{},[464],{"type":23,"value":465},"北京大学科教创新卓越中心自研SCOW算力平台与鹤思算力调度，能够高效纳管大规模的异构算力集群，通过软硬件解耦的分层体系架构，屏蔽底层硬件差异，向下支持各种硬件，向上支持各种框架模型应用。基于openEuler+MindSpore社区的开源集群训推平台，通过高效的算力调度技术实现训推一体，已应用到了全国超过60家单位，并逐渐应用到了卓越中心国产集群。",{"type":17,"tag":140,"props":467,"children":469},{"id":468},"_03高效部署开箱即用",[470],{"type":17,"tag":34,"props":471,"children":472},{},[473],{"type":23,"value":474},"# 03****高效部署开箱即用",{"type":17,"tag":25,"props":476,"children":477},{},[478],{"type":23,"value":479},"2025年3月7日在北京大学鲲鹏昇腾科教创新卓越中心，北大师生和openEuler 与MindSpore 社区开发者共同首次打通了openEuler+MindSpore+DeepSeek全栈开源推理方案的生产环境部署实践。",{"type":17,"tag":25,"props":481,"children":482},{},[483],{"type":17,"tag":191,"props":484,"children":486},{"alt":7,"src":485},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/14/6d40622c5d8f4720b39a2ec82df2f9cf.png",[],{"type":17,"tag":196,"props":488,"children":490},{"id":489},"_1openeuler环境部署",[491],{"type":17,"tag":34,"props":492,"children":493},{},[494],{"type":23,"value":495},"1****openEuler环境部署",{"type":17,"tag":25,"props":497,"children":498},{},[499,501,506],{"type":23,"value":500},"组网结构推荐使用直连模式，即服务器通过交换机直连，确保每张卡都可以ping通其他卡。 ",{"type":17,"tag":34,"props":502,"children":503},{},[504],{"type":23,"value":505},"环境要求：",{"type":23,"value":507}," 两台Atlas 800I A2（8*64G）。 Ascend HDK Driver 24.1.0版本，Firmware 7.5.0.3.22版本。 openEuler 24.03 LTS版本（内核 5.10）。",{"type":17,"tag":196,"props":509,"children":511},{"id":510},"_2deepseek模型下载",[512],{"type":17,"tag":34,"props":513,"children":514},{},[515],{"type":23,"value":516},"2****DeepSeek模型下载",{"type":17,"tag":25,"props":518,"children":519},{},[520,522,527,529],{"type":23,"value":521},"模型需在各个推理节点上进行部署，请确保模型",{"type":17,"tag":34,"props":523,"children":524},{},[525],{"type":23,"value":526},"位置在各节点上一致",{"type":23,"value":528},"，可从如下地址下载。 魔乐社区： ",{"type":17,"tag":530,"props":531,"children":535},"a",{"href":532,"rel":533},"https://modelers.cn/models/MindSpore-Lab/DeepSeek-R1-W8A8",[534],"nofollow",[536],{"type":23,"value":532},{"type":17,"tag":196,"props":538,"children":540},{"id":539},"_3mindspore一键部署",[541],{"type":17,"tag":34,"props":542,"children":543},{},[544],{"type":23,"value":545},"3****MindSpore一键部署",{"type":17,"tag":25,"props":547,"children":548},{},[549,551],{"type":23,"value":550},"一键式部署脚本推荐在单独控制的节点执行，控制节点需要可以使用SSH访问各个推理节点。 ",{"type":17,"tag":34,"props":552,"children":553},{},[554],{"type":23,"value":555},"Step1：下载oedeploy工具，调整oedeploy配置文件",{"type":17,"tag":557,"props":558,"children":560},"pre",{"code":559},"# 下载插件包并解压\nwget https://repo.oepkgs.net/openEuler/rpm/openEuler-24.03-LTS/contrib/oedp/plugins/mindspore-deepseek.tar.gz\ntar zxvf mindspore-deepseek.tar.gz\n# 按照提示调整mindspore-deepseek目录下config.yaml\n# 下载并安装oedp工具\nwget https://repo.oepkgs.net/openEuler/rpm/openEuler-24.03-LTS/contrib/oedp/aarch64/Packages/oedp-1.0.0-2.oe2503.aarch64.rpm\nyum localinstall oedp-1.0.0-2.oe2503.aarch64.rpm\n",[561],{"type":17,"tag":562,"props":563,"children":564},"code",{"__ignoreMap":7},[565],{"type":23,"value":559},{"type":17,"tag":25,"props":567,"children":568},{},[569],{"type":17,"tag":34,"props":570,"children":571},{},[572],{"type":23,"value":573},"Step2：运行一键部署脚本",{"type":17,"tag":557,"props":575,"children":577},{"code":576},"oedp run install  #在mindspore-deepseek目录下运行\n",[578],{"type":17,"tag":562,"props":579,"children":580},{"__ignoreMap":7},[581],{"type":23,"value":576},{"type":17,"tag":196,"props":583,"children":585},{"id":584},"_4推理服务测试验证",[586],{"type":17,"tag":34,"props":587,"children":588},{},[589],{"type":23,"value":590},"4****推理服务测试验证",{"type":17,"tag":25,"props":592,"children":593},{},[594],{"type":23,"value":595},"服务拉起后，可使用如下curl指令进行验证",{"type":17,"tag":557,"props":597,"children":599},{"code":598},"curl http://主节点ip:推理服务端口/v1/completions -H \"Content-Type: application/json\" -d '{\"model\": \"模型路径\", \"prompt\": \"I love Beijing, because\", \"max_tokens\": 32, \"temperature\": 0, \"top_p\": 1.0, \"top_k\": 1, \"repetition_penalty\":1.0}'\n",[600],{"type":17,"tag":562,"props":601,"children":602},{"__ignoreMap":7},[603],{"type":23,"value":598},{"type":17,"tag":25,"props":605,"children":606},{},[607],{"type":23,"value":608},"注意:\"模型路径\"请确保和插件目录下config.yaml中的model_path值一致",{"type":17,"tag":25,"props":610,"children":611},{},[612],{"type":17,"tag":34,"props":613,"children":614},{},[615],{"type":23,"value":616},"返回推理结果示例：",{"type":17,"tag":25,"props":618,"children":619},{},[620],{"type":17,"tag":191,"props":621,"children":623},{"alt":7,"src":622},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/14/c417539ec05641a18d4186e50a3b756d.png",[],{"type":17,"tag":25,"props":625,"children":626},{},[627],{"type":23,"value":628},"**测试结果：**基于DeepSeek-R1 W8A8大模型，2台Atlas 800I A2 64GB服务器，128请求吞吐率超过1198token/s，单请求吞吐率可达16.7token/s。",{"type":17,"tag":140,"props":630,"children":632},{"id":631},"_04开源方案优秀实践",[633],{"type":17,"tag":34,"props":634,"children":635},{},[636],{"type":23,"value":637},"# 04****开源方案优秀实践",{"type":17,"tag":196,"props":639,"children":641},{"id":640},"_1高效部署开箱即用",[642],{"type":17,"tag":34,"props":643,"children":644},{},[645],{"type":23,"value":646},"1****高效部署，开箱即用",{"type":17,"tag":25,"props":648,"children":649},{},[650],{"type":23,"value":651},"采用MindSpore+openEuler一键部署脚本，自动完成镜像拉取、集群启动、推理服务拉取等技术。相较于传统手动部署时间通常花费2-3小时，基于当前复杂组网现状，本方案端到端部署时间约20分钟。 未来基于精简组网及内存卸载，一键部署可在单台主机上进行DeepSeek满血部署，部署效率进一步提升。",{"type":17,"tag":196,"props":653,"children":655},{"id":654},"_2全栈开放生态共建",[656],{"type":17,"tag":34,"props":657,"children":658},{},[659],{"type":23,"value":660},"2****全栈开放，生态共建",{"type":17,"tag":25,"props":662,"children":663},{},[664],{"type":23,"value":665},"本方案整合了DeepSeek、openEuler、MindSpore与vLLM/RAY等社区开源组件，用户可以轻松获取源码，根据需求进行二次开发。联合社区进行重大特性开发与生态共建，企业开发成本降低。",{"type":17,"tag":25,"props":667,"children":668},{},[669],{"type":23,"value":670},"北大科教创新卓越中心根据自身实际情况开发算力集群训推平台，并贡献反馈开源社区，取得模型部署易用性的进一步提升。",{"type":17,"tag":196,"props":672,"children":674},{"id":673},"_3按需量化动态适配",[675],{"type":17,"tag":34,"props":676,"children":677},{},[678],{"type":23,"value":679},"3****按需量化，动态适配",{"type":17,"tag":25,"props":681,"children":682},{},[683],{"type":23,"value":684},"压缩模型工具(金箍棒）针对开源大模型，根据用户及硬件能力可进行适当定制量化，本方案使用8bit权重量化、SmoothQuant 8bit量化和混合量化等技术，最终以CEval精度损失2分的代价，实现了DeepSeek-R1w8a8的大模型部署。",{"type":17,"tag":25,"props":686,"children":687},{},[688],{"type":17,"tag":191,"props":689,"children":691},{"alt":7,"src":690},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/03/14/8aff7f3012f748a9b277b59bb5fe71ea.png",[],{"title":7,"searchDepth":693,"depth":693,"links":694},4,[695,700,706,712],{"id":142,"depth":696,"text":148,"children":697},3,[698,699],{"id":198,"depth":693,"text":204},{"id":217,"depth":693,"text":223},{"id":236,"depth":696,"text":242,"children":701},[702,703,705],{"id":250,"depth":693,"text":256},{"id":316,"depth":693,"text":704},"2MindSpore图编译：将模型编译为计算图，通过模式匹配自动将小算子融为大算子",{"id":454,"depth":693,"text":460},{"id":468,"depth":696,"text":474,"children":707},[708,709,710,711],{"id":489,"depth":693,"text":495},{"id":510,"depth":693,"text":516},{"id":539,"depth":693,"text":545},{"id":584,"depth":693,"text":590},{"id":631,"depth":696,"text":637,"children":713},[714,715,716],{"id":640,"depth":693,"text":646},{"id":654,"depth":693,"text":660},{"id":673,"depth":693,"text":679},"markdown","content:news:zh:3639.md","content","news/zh/3639.md","news/zh/3639","md",1776506086335]