[{"data":1,"prerenderedAt":329},["ShallowReactive",2],{"content-query-sIUuNTBviw":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":323,"_id":324,"_source":325,"_file":326,"_stem":327,"_extension":328},"/technology-blogs/zh/2026-1-30","zh",false,"","SGLang正式支持昇思MindSpore，使能昇腾大模型服务化高性能","让开发者可以基于昇思MindSpore体验低延迟、高吞吐的SGLang推理能力","2026-1-30","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2024/11/28/8e0e0150508a4c5ba4287fa3bec8ea3f.png","technology-blogs","技术解读",{"type":15,"children":16,"toc":320},"root",[17,26,43,49,54,59,70,75,82,88,93,98,103,108,118,123,134,143,148,156,168,176,181,189,194,203,208,215,220,225,233,238,246,258,264,269,274,283,288,297,308,313],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"_01-介绍",[23],{"type":24,"value":25},"text","01 介绍",{"type":18,"tag":27,"props":28,"children":29},"p",{},[30,32,41],{"type":24,"value":31},"SGLang是一个以高性能著称的开源大模型服务化引擎，以其零开销的CPU调度和基于基数树 (Radix Tree) 的高命中率KV Cache为特色，性能位于业界领先水准。2025年11月，经过SGLang与昇思MindSpore社区的共同努力，昇思MindSpore社区提交的PR（",{"type":18,"tag":33,"props":34,"children":38},"a",{"href":35,"rel":36},"https://github.com/sgl-project/sglang/pull/9234%EF%BC%89%E6%88%90%E5%8A%9F%E5%90%88%E5%85%A5SGLang%E4%BB%93%E5%BA%93%E3%80%82SGLang",[37],"nofollow",[39],{"type":24,"value":40},"https://github.com/sgl-project/sglang/pull/9234）成功合入SGLang仓库。SGLang",{"type":24,"value":42}," 0.5.6以后的版本中将包含MindSpore支持。用户可以在SGLang框架里无缝运行MindSpore模型，体验昇腾平台的高性能推理服务化。",{"type":18,"tag":19,"props":44,"children":46},{"id":45},"_02-技术方案",[47],{"type":24,"value":48},"02 技术方案",{"type":18,"tag":27,"props":50,"children":51},{},[52],{"type":24,"value":53},"SGLang-MindSpore方案构建于SGLang对昇腾平台的既有支持之上。在前端语言、数据并行控制、调度器和KV Cache管理层面，复用SGLang既有的实现；在模型执行层面，将MindSpore模型统一封装到MindSporeForCausalLM类，替换SGLang原生模型，方案简洁，侵入式改动少。同时，通过张量和HCCL通信组的复用，打通了MindSpore-PyTorch的双框架协同运行的能力而几乎不产生资源浪费。这种双框架协同的模式使得SGLang的许多特性只需要较简单的接口适配就可以应用于MindSpore模型，如数据并行、PD分离、投机解码等。",{"type":18,"tag":27,"props":55,"children":56},{},[57],{"type":24,"value":58},"SGLang-MindSpore的架构框图如下所示。其中，蓝色部分表示SGLang原生组件，粉红色部分表示MindSpore新增组件。",{"type":18,"tag":60,"props":61,"children":63},"div",{"style":62},"text-align: center;",[64],{"type":18,"tag":65,"props":66,"children":69},"img",{"src":67,"style":68,"alt":7},"/category/information/technology-blogs/banner/2026-1-30/1.jpg","display: block;margin: 0 auto;max-width:70%",[],{"type":18,"tag":27,"props":71,"children":72},{},[73],{"type":24,"value":74},"SGLang-MindSpore的推理流程图如下所示。模型输入和KV Cache等PyTorch张量，通过DLPack转换为MindSpore Tensor。而MindSpore模型输出的Logits，也通过DLPack转换为PyTorch Tensor，再执行Sample, Detokenize等后处理流程。该过程不涉及额外的显存分配或数据搬运，从而让MindSpore模型无缝衔接到SGLang原生流程中。",{"type":18,"tag":60,"props":76,"children":77},{"style":62},[78],{"type":18,"tag":65,"props":79,"children":81},{"src":80,"style":68,"alt":7},"/category/information/technology-blogs/banner/2026-1-30/2.jpg",[],{"type":18,"tag":19,"props":83,"children":85},{"id":84},"_03-快速上手-基于昇思mindspore体验sglang推理",[86],{"type":24,"value":87},"03 快速上手-基于昇思MindSpore体验SGLang推理",{"type":18,"tag":27,"props":89,"children":90},{},[91],{"type":24,"value":92},"1、安装指引",{"type":18,"tag":27,"props":94,"children":95},{},[96],{"type":24,"value":97},"我们提供conda和Docker两种安装方式。",{"type":18,"tag":27,"props":99,"children":100},{},[101],{"type":24,"value":102},"方式一：基于conda环境安装",{"type":18,"tag":27,"props":104,"children":105},{},[106],{"type":24,"value":107},"1）创建Python 3.11 的conda环境。",{"type":18,"tag":109,"props":110,"children":112},"pre",{"code":111},"conda create -n [环境名] python=3.11\nconda activate [环境名]\n",[113],{"type":18,"tag":114,"props":115,"children":116},"code",{"__ignoreMap":7},[117],{"type":24,"value":111},{"type":18,"tag":27,"props":119,"children":120},{},[121],{"type":24,"value":122},"2）将NPU驱动更新至25.5：",{"type":18,"tag":27,"props":124,"children":125},{},[126,132],{"type":18,"tag":33,"props":127,"children":130},{"href":128,"rel":129},"https://www.hiascend.com/hardware/firmware-drivers/community",[37],[131],{"type":24,"value":128},{"type":24,"value":133},"\n并安装社区版CANN 8.5：",{"type":18,"tag":27,"props":135,"children":136},{},[137],{"type":18,"tag":33,"props":138,"children":141},{"href":139,"rel":140},"https://www.hiascend.com/cann/download",[37],[142],{"type":24,"value":139},{"type":18,"tag":27,"props":144,"children":145},{},[146],{"type":24,"value":147},"3）安装基于昇腾平台的SGLang。",{"type":18,"tag":109,"props":149,"children":151},{"code":150},"git clone https://github.com/sgl-project/sglang.git\ncd sglang\ncp python/pyproject_other.toml python/pyproject.toml\npip install -e \"python[all_npu]\"\n",[152],{"type":18,"tag":114,"props":153,"children":154},{"__ignoreMap":7},[155],{"type":24,"value":150},{"type":18,"tag":27,"props":157,"children":158},{},[159,161],{"type":24,"value":160},"4）安装sgl-kernel-npu。请在",{"type":18,"tag":33,"props":162,"children":165},{"href":163,"rel":164},"https://github.com/sgl-project/sgl-kernel-npu/releases%E6%A0%B9%E6%8D%AE%E7%A1%AC%E4%BB%B6%E5%B9%B3%E5%8F%B0%E4%B8%8B%E8%BD%BD%E5%AF%B9%E5%BA%94%E7%9A%84%E5%8E%8B%E7%BC%A9%E5%8C%85%E5%B9%B6%E8%A7%A3%E5%8E%8B%E5%AE%89%E8%A3%85%EF%BC%8C%E4%BB%A5%E6%98%87%E8%85%BE910%E4%B8%BA%E4%BE%8B%EF%BC%9A",[37],[166],{"type":24,"value":167},"https://github.com/sgl-project/sgl-kernel-npu/releases根据硬件平台下载对应的压缩包并解压安装，以昇腾910为例：",{"type":18,"tag":109,"props":169,"children":171},{"code":170},"wget https://github.com/sgl-project/sgl-kernel-npu/releases/download/2026.01.21/sgl-kernel-npu_2026.01.21_8.5.0_910b.zip -O tmp.zip && unzip tmp.zip && rm -f tmp.zip\npip install output/*.whl\n",[172],{"type":18,"tag":114,"props":173,"children":174},{"__ignoreMap":7},[175],{"type":24,"value":170},{"type":18,"tag":27,"props":177,"children":178},{},[179],{"type":24,"value":180},"5）安装MindSpore模型仓库，该流程会自动安装MindSpore 2.8版本。",{"type":18,"tag":109,"props":182,"children":184},{"code":183},"git clone https://github.com/mindspore-lab/sgl-mindspore.git\ncd sgl-mindspore\npip install -e .\n",[185],{"type":18,"tag":114,"props":186,"children":187},{"__ignoreMap":7},[188],{"type":24,"value":183},{"type":18,"tag":27,"props":190,"children":191},{},[192],{"type":24,"value":193},"方式二：基于SGLang官方Dockerfile构建docker镜像:",{"type":18,"tag":27,"props":195,"children":196},{},[197],{"type":18,"tag":33,"props":198,"children":201},{"href":199,"rel":200},"https://github.com/sgl-project/sglang/blob/main/docker/npu.Dockerfile",[37],[202],{"type":24,"value":199},{"type":18,"tag":27,"props":204,"children":205},{},[206],{"type":24,"value":207},"构建成功后，在容器内安装最新版MindSpore和模型仓库：",{"type":18,"tag":109,"props":209,"children":210},{"code":183},[211],{"type":18,"tag":114,"props":212,"children":213},{"__ignoreMap":7},[214],{"type":24,"value":183},{"type":18,"tag":27,"props":216,"children":217},{},[218],{"type":24,"value":219},"2、体验推理流程",{"type":18,"tag":27,"props":221,"children":222},{},[223],{"type":24,"value":224},"安装完成后，请设置以下环境变量：",{"type":18,"tag":109,"props":226,"children":228},{"code":227},"export ASCEND_RT_VISIBLE_DEVICES=0 # 指定使用几号卡\nexport PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python # 避免protobuf版本不匹配\n",[229],{"type":18,"tag":114,"props":230,"children":231},{"__ignoreMap":7},[232],{"type":24,"value":227},{"type":18,"tag":27,"props":234,"children":235},{},[236],{"type":24,"value":237},"之后可运行以下脚本，体验离线推理流程。",{"type":18,"tag":109,"props":239,"children":241},{"code":240},"import sglang as sgl\n \nllm = sgl.Engine(\n    model_path=\"/path/to/your/model\",  # 模型权重路径，例如Qwen3-8B\n    device=\"npu\",\n    model_impl=\"mindspore\", # 使用MindSpore模型\n    attention_backend=\"ascend\",\n    tp_size=1, # 张量并行尺寸, 根据模型大小和单卡内存决定\n    dp_size=1  # 数据并行尺寸，需要保证tp_size * dp_size \u003C= 卡数\n    mem_fraction_static=0.8, # 给SGLang预留的NPU内存比例\n)\n \n# 提示词，可自行修改\nprompts = [\n    \"Hello, my name is\",\n    \"The capital of France is\",\n    \"The future of AI is\"\n]\n \nsampling_params = {\"temperature\": 0, \"top_p\": 0.9}\noutputs = llm.generate(prompts, sampling_params)\n \nfor prompt, output in zip(prompts, outputs):\n    print(f\"Prompt: {prompt}\")\n    print(f\"Generated: {output['text']}\")\n    print(\"---\")\n",[242],{"type":18,"tag":114,"props":243,"children":244},{"__ignoreMap":7},[245],{"type":24,"value":240},{"type":18,"tag":27,"props":247,"children":248},{},[249,251],{"type":24,"value":250},"更多使用指导，包括服务化推理、性能测试等，请参考SGLang官方文档（",{"type":18,"tag":33,"props":252,"children":255},{"href":253,"rel":254},"https://docs.sglang.io/supported_models/mindspore_models.html%EF%BC%89%E4%BB%A5%E5%8F%8Asgl-mindspore%E6%A8%A1%E5%9E%8B%E4%BB%93%E5%BA%93%E6%96%87%E6%A1%A3%EF%BC%88https://github.com/mindspore-lab/sgl-mindspore/blob/main/README_zh.md%EF%BC%89%E3%80%82",[37],[256],{"type":24,"value":257},"https://docs.sglang.io/supported_models/mindspore_models.html）以及sgl-mindspore模型仓库文档（https://github.com/mindspore-lab/sgl-mindspore/blob/main/README_zh.md）。",{"type":18,"tag":19,"props":259,"children":261},{"id":260},"_04-结语",[262],{"type":24,"value":263},"04 结语",{"type":18,"tag":27,"props":265,"children":266},{},[267],{"type":24,"value":268},"关于SGLang支持昇思MindSpore的更多链接，请参考",{"type":18,"tag":27,"props":270,"children":271},{},[272],{"type":24,"value":273},"SGLang 0.5.6发行版，包含MindSpore支持：",{"type":18,"tag":27,"props":275,"children":276},{},[277],{"type":18,"tag":33,"props":278,"children":281},{"href":279,"rel":280},"https://github.com/sgl-project/sglang/releases/tag/v0.5.6",[37],[282],{"type":24,"value":279},{"type":18,"tag":27,"props":284,"children":285},{},[286],{"type":24,"value":287},"SGLang官方文档中的MindSpore模型部分：",{"type":18,"tag":27,"props":289,"children":290},{},[291],{"type":18,"tag":33,"props":292,"children":295},{"href":293,"rel":294},"https://docs.sglang.io/supported_models/mindspore_models.html",[37],[296],{"type":24,"value":293},{"type":18,"tag":27,"props":298,"children":299},{},[300,302],{"type":24,"value":301},"MindSpore模型仓库：\n",{"type":18,"tag":33,"props":303,"children":306},{"href":304,"rel":305},"https://github.com/mindspore-lab/sgl-mindspore",[37],[307],{"type":24,"value":304},{"type":18,"tag":27,"props":309,"children":310},{},[311],{"type":24,"value":312},"了解和讨论SGLang-MindSpore及大模型推理技术，欢迎大家加入大模型推理服务SIG群交流。",{"type":18,"tag":60,"props":314,"children":315},{"style":62},[316],{"type":18,"tag":65,"props":317,"children":319},{"src":318,"style":68,"alt":7},"/category/information/technology-blogs/banner/2026-1-30/3.jpg",[],{"title":7,"searchDepth":321,"depth":321,"links":322},4,[],"markdown","content:technology-blogs:zh:2026-1-30.md","content","technology-blogs/zh/2026-1-30.md","technology-blogs/zh/2026-1-30","md",1776506118763]