[{"data":1,"prerenderedAt":221},["ShallowReactive",2],{"content-query-zkc3XqYBna":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":215,"_id":216,"_source":217,"_file":218,"_stem":219,"_extension":220},"/news/zh/3743","zh",false,"","MindSpore成立LLM Inference Serving SIG：提供高性能、易用的MindSpore大模型推理部署能力","为了将昇思MindSpore大模型推理能力接入vLLM、SGLang等上游开源推理服务框架，整合上下游社区的技术优势，提供高性能、易用的大模型推理服务能力，昇思MindSpore社区正式成立MindSpore大模型推理服务SIG（LLM Inference Serving SIG）。","2025-05-29","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/05/30/fab7abe087b8412abcd399e267855345.png","news",{"type":14,"children":15,"toc":212},"root",[16,24,30,35,40,45,56,61,70,75,84,93,98,103,108,116,121,126,135,140,149,154,163,168,176,184,192,200,207],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"mindspore成立llm-inference-serving-sig提供高性能易用的mindspore大模型推理部署能力",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"为了将昇思MindSpore大模型推理能力接入vLLM、SGLang等上游开源推理服务框架，整合上下游社区的技术优势，提供高性能、易用的大模型推理服务能力，昇思MindSpore社区正式成立MindSpore大模型推理服务SIG（LLM Inference Serving SIG）。LLM Inference Serving SIG已孵化了开源插件vLLM-MindSpore，可支持vLLM框架以MindSpore为推理计算底座，实现大模型推理服务化部署。",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":23,"value":34},"vLLM-MindSpore是一款开源vLLM插件，其设计理念是将MindSpore推理大模型注册到vLLM中，而不修改vLLM的用户接口和服务化能力。最终，实现MindSpore框架的图融合、算子并行下发等推理计算加速能力，与vLLM的Continuous Batching、Prefix Caching等推理服务加速能力的有机结合。",{"type":17,"tag":25,"props":36,"children":37},{},[38],{"type":23,"value":39},"2025年1月项目启动以来，vLLM-MindSpore已迭代适配vLLM v0.6.6.post1版本和v0.7.3版本的基础功能。vLLM-MindSpore的首个正式版本计划于6月下旬随MindSpore 2.7.0-rc1发布，适配vLLM v0.8.3版本和V1架构。",{"type":17,"tag":25,"props":41,"children":42},{},[43],{"type":23,"value":44},"vLLM-MindSpore项目代码仓：",{"type":17,"tag":25,"props":46,"children":47},{},[48],{"type":17,"tag":49,"props":50,"children":54},"a",{"href":51,"rel":52},"http://gitee.com/mindspore/vllm-mindspore",[53],"nofollow",[55],{"type":23,"value":51},{"type":17,"tag":25,"props":57,"children":58},{},[59],{"type":23,"value":60},"Qwen3-32B体验示例：",{"type":17,"tag":25,"props":62,"children":63},{},[64],{"type":17,"tag":49,"props":65,"children":68},{"href":66,"rel":67},"https://modelers.cn/models/MindSpore-Lab/Qwen3-32B",[53],[69],{"type":23,"value":66},{"type":17,"tag":25,"props":71,"children":72},{},[73],{"type":23,"value":74},"DeepSeek-R1体验示例：",{"type":17,"tag":25,"props":76,"children":77},{},[78],{"type":17,"tag":49,"props":79,"children":82},{"href":80,"rel":81},"https://modelers.cn/models/MindSpore-Lab/DeepSeek-R1-W8A8",[53],[83],{"type":23,"value":80},{"type":17,"tag":25,"props":85,"children":86},{},[87],{"type":17,"tag":88,"props":89,"children":90},"strong",{},[91],{"type":23,"value":92},"SIG目标：",{"type":17,"tag":25,"props":94,"children":95},{},[96],{"type":23,"value":97},"1、构建和优化MindSpore大模型推理服务能力，实现MindSpore大模型推理与vLLM、SGLang等上游开源推理服务框架的无缝对接。",{"type":17,"tag":25,"props":99,"children":100},{},[101],{"type":23,"value":102},"2、开发和优化vLLM-MindSpore插件，持续适配vLLM最新稳定版本和SOTA大模型，最终实现与上游vLLM项目的同步演进。",{"type":17,"tag":25,"props":104,"children":105},{},[106],{"type":23,"value":107},"3、孵化SGLang-MindSpore插件（进行中，计划基础功能完备后开源）。",{"type":17,"tag":25,"props":109,"children":110},{},[111],{"type":17,"tag":88,"props":112,"children":113},{},[114],{"type":23,"value":115},"SIG例会：",{"type":17,"tag":25,"props":117,"children":118},{},[119],{"type":23,"value":120},"双周例会，北京时间周三晚19点。",{"type":17,"tag":25,"props":122,"children":123},{},[124],{"type":23,"value":125},"首次例会录屏：",{"type":17,"tag":25,"props":127,"children":128},{},[129],{"type":17,"tag":49,"props":130,"children":133},{"href":131,"rel":132},"https://www.bilibili.com/video/BV1GS7GzJEaR/",[53],[134],{"type":23,"value":131},{"type":17,"tag":25,"props":136,"children":137},{},[138],{"type":23,"value":139},"例会纪要：",{"type":17,"tag":25,"props":141,"children":142},{},[143],{"type":17,"tag":49,"props":144,"children":147},{"href":145,"rel":146},"https://etherpad.mindspore.cn/p/sig-LLM-Inference-Serving-meetings",[53],[148],{"type":23,"value":145},{"type":17,"tag":25,"props":150,"children":151},{},[152],{"type":23,"value":153},"订阅邮件：",{"type":17,"tag":25,"props":155,"children":156},{},[157],{"type":17,"tag":49,"props":158,"children":161},{"href":159,"rel":160},"https://www.mindspore.cn/sig/LLM%20Inference%20Serving",[53],[162],{"type":23,"value":159},{"type":17,"tag":25,"props":164,"children":165},{},[166],{"type":23,"value":167},"（欢迎大家订阅SIG邮件，后续例会将通过邮件通知大家）",{"type":17,"tag":25,"props":169,"children":170},{},[171],{"type":17,"tag":88,"props":172,"children":173},{},[174],{"type":23,"value":175},"SIG成员：",{"type":17,"tag":25,"props":177,"children":178},{},[179],{"type":17,"tag":88,"props":180,"children":181},{},[182],{"type":23,"value":183},"Maintainer 列表",{"type":17,"tag":25,"props":185,"children":186},{},[187],{"type":17,"tag":188,"props":189,"children":191},"img",{"alt":7,"src":190},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/05/30/e77605ac7ad34daaaa043c2cc2c1b792.png",[],{"type":17,"tag":25,"props":193,"children":194},{},[195],{"type":17,"tag":88,"props":196,"children":197},{},[198],{"type":23,"value":199},"Committer列****表",{"type":17,"tag":25,"props":201,"children":202},{},[203],{"type":17,"tag":188,"props":204,"children":206},{"alt":7,"src":205},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/05/30/efbf06bd80db4ee88521d0a22969777d.png",[],{"type":17,"tag":25,"props":208,"children":209},{},[210],{"type":23,"value":211},"欢迎感兴趣的朋友们加入到LLM Inference Serving SIG，共同探讨大模型推理部署技术。",{"title":7,"searchDepth":213,"depth":213,"links":214},4,[],"markdown","content:news:zh:3743.md","content","news/zh/3743.md","news/zh/3743","md",1776506088661]