[{"data":1,"prerenderedAt":566},["ShallowReactive",2],{"content-query-Zj9LtF9XZp":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":560,"_id":561,"_source":562,"_file":563,"_stem":564,"_extension":565},"/technology-blogs/zh/3831","zh",false,"","昇思MindSpore基于昇腾硬件快速支持Qwen3-Next-80B-A3B系列模型，内附教程4步实现推理！","阿里云发布语言大模型，昇思MindSpore AI框架在模型开源当日即实现快速支持","2025-09-13","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/09/19/2d18c030094e4b62913209db9f84b29a.png","technology-blogs",{"type":14,"children":15,"toc":557},"root",[16,24,30,35,46,51,60,69,74,79,84,101,116,126,137,142,152,157,165,173,184,189,197,205,213,218,226,234,239,247,255,265,276,284,289,297,305,310,318,326,331,339,354,362,371,379,384,392,397,404,412,419,424,431,438,445,449,456,463,468,476,483,490,500,507,512,519,526,530,538,545,549],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"昇思mindspore基于昇腾硬件快速支持qwen3-next-80b-a3b系列模型内附教程4步实现推理",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"9月11日，阿里云发布语言大模型Qwen3-Next-80B-A3B-Instruct和Qwen3-Next-80B-A3B-Thinking，昇思MindSpore AI框架在模型开源当日即实现快速支持。昇思MindSpore基于昇腾硬件通过提供主流生态接口，依托MindSpore Transformers套件，快速完成模型无缝迁移，并上传至开源社区，欢迎广大开发者下载体验！",{"type":17,"tag":25,"props":31,"children":32},{},[33],{"type":23,"value":34},"昇思开源社区MindSpore Transformers代码仓：",{"type":17,"tag":25,"props":36,"children":37},{},[38],{"type":17,"tag":39,"props":40,"children":44},"a",{"href":41,"rel":42},"https://gitee.com/mindspore/mindformers",[43],"nofollow",[45],{"type":23,"value":41},{"type":17,"tag":25,"props":47,"children":48},{},[49],{"type":23,"value":50},"魔乐社区vLLM-MindSpore推理版地址：",{"type":17,"tag":25,"props":52,"children":53},{},[54],{"type":17,"tag":39,"props":55,"children":58},{"href":56,"rel":57},"https://modelers.cn/models/MindSpore-Lab/Qwen3-Next-80B-A3B-Thinking",[43],[59],{"type":23,"value":56},{"type":17,"tag":25,"props":61,"children":62},{},[63],{"type":17,"tag":39,"props":64,"children":67},{"href":65,"rel":66},"https://modelers.cn/models/MindSpore-Lab/Qwen3-Next-80B-A3B-Instruct",[43],[68],{"type":23,"value":65},{"type":17,"tag":25,"props":70,"children":71},{},[72],{"type":23,"value":73},"Qwen3-Next，该系列定位为“下一代基础模型”，主打极端上下文长度与参数效率。",{"type":17,"tag":25,"props":75,"children":76},{},[77],{"type":23,"value":78},"架构层面引入了三项核心创新。首先是Hybrid Attention，它使用Gated DeltaNet和Gated Attention替代传统注意力机制，以实现高效的长文本建模。其次是High-Sparsity MoE，将激活比例压缩至1:50，大幅减少了单个token的FLOPs而不损失模型容量。最后是Multi-Token Prediction，在预训练阶段同步预测多个token，从而提升性能并加速推理。此外，模型还辅以zero-centered、weight-decayed layernorm等多项稳定化改进，增强了训练的鲁棒性。",{"type":17,"tag":25,"props":80,"children":81},{},[82],{"type":23,"value":83},"基于这些设计，团队训练并计划开源Qwen3-Next-80B-A3B模型。该模型总参数量为80B，但激活参数仅3B，其训练成本不到Qwen3-32B的十分之一，而在下游任务上的表现却更胜一筹。特别是在32K以上长上下文场景中，其推理吞吐量相比32B基线模型提升超过10倍。",{"type":17,"tag":18,"props":85,"children":87},{"id":86},"_01-qwen3-next-80b-a3b-thinking-vllm-mindspore-推理指南",[88,94,96],{"type":17,"tag":89,"props":90,"children":91},"strong",{},[92],{"type":23,"value":93},"# 01",{"type":23,"value":95}," ",{"type":17,"tag":89,"props":97,"children":98},{},[99],{"type":23,"value":100},"Qwen3-Next-80B-A3B-Thinking vllm-mindspore 推理指南",{"type":17,"tag":25,"props":102,"children":103},{},[104],{"type":17,"tag":89,"props":105,"children":106},{},[107,109,114],{"type":23,"value":108},"1、",{"type":17,"tag":89,"props":110,"children":111},{},[112],{"type":23,"value":113},"下载链",{"type":23,"value":115},"接",{"type":17,"tag":25,"props":117,"children":118},{},[119,121],{"type":23,"value":120},"魔乐社区：",{"type":17,"tag":39,"props":122,"children":124},{"href":56,"rel":123},[43],[125],{"type":23,"value":56},{"type":17,"tag":25,"props":127,"children":128},{},[129],{"type":17,"tag":89,"props":130,"children":131},{},[132],{"type":17,"tag":89,"props":133,"children":134},{},[135],{"type":23,"value":136},"2、模型下载。",{"type":17,"tag":25,"props":138,"children":139},{},[140],{"type":23,"value":141},"执行以下命令为自定义下载路径`/mnt/data/Qwen3-Next-80B-A3B-Thinking` 添加白名单。",{"type":17,"tag":143,"props":144,"children":146},"pre",{"code":145},"export HUB_WHITE_LIST_PATHS=/mnt/data/Qwen3-Next-80B-A3B-Thinking\n",[147],{"type":17,"tag":148,"props":149,"children":150},"code",{"__ignoreMap":7},[151],{"type":23,"value":145},{"type":17,"tag":25,"props":153,"children":154},{},[155],{"type":23,"value":156},"执行以下命令从魔乐社区下载Qwen3-Next-80B-A3B-Thinking 权重文件至指定路径`/mnt/data/Qwen3-Next-80B-A3B-Thinking` 。下载的文件包含模型代码、权重、分词模型和示例代码，占用约 152GB 的磁盘空间，请预留足够空间。",{"type":17,"tag":143,"props":158,"children":160},{"code":159},"pip install openmind_hub\n",[161],{"type":17,"tag":148,"props":162,"children":163},{"__ignoreMap":7},[164],{"type":23,"value":159},{"type":17,"tag":143,"props":166,"children":168},{"code":167},"python\nfrom openmind_hub import snapshot_download\nsnapshot_download(\n   repo_id=\"MindSpore-Lab/Qwen3-Next-80B-A3B-Thinking\",\n   local_dir=\"/mnt/data/Qwen3-Next-80B-A3B-Thinking\",\n   local_dir_use_symlinks=False\n)\nexit()\n",[169],{"type":17,"tag":148,"props":170,"children":171},{"__ignoreMap":7},[172],{"type":23,"value":167},{"type":17,"tag":25,"props":174,"children":175},{},[176],{"type":17,"tag":89,"props":177,"children":178},{},[179],{"type":17,"tag":89,"props":180,"children":181},{},[182],{"type":23,"value":183},"3、快速开始",{"type":17,"tag":25,"props":185,"children":186},{},[187],{"type":23,"value":188},"Qwen3-Next-80B-A3B-Thinking推理需要1台（8卡）Atlas 800T/800I A2（64G）服务器服务器（基于BF16权重）。昇思MindSpore提供了Qwen3-Next-80B-A3B-Thinking推理可用的Docker容器镜像，供开发者快速体验。",{"type":17,"tag":25,"props":190,"children":191},{},[192],{"type":17,"tag":89,"props":193,"children":194},{},[195],{"type":23,"value":196},"3.1 停止其他进程，避免服务器中其他进程影响",{"type":17,"tag":143,"props":198,"children":200},{"code":199},"pkill -9 python\npkill -9 mindie\npkill -9 ray\n",[201],{"type":17,"tag":148,"props":202,"children":203},{"__ignoreMap":7},[204],{"type":23,"value":199},{"type":17,"tag":25,"props":206,"children":207},{},[208],{"type":17,"tag":89,"props":209,"children":210},{},[211],{"type":23,"value":212},"3.2 下载昇思 MindSpore 推理容器镜像",{"type":17,"tag":25,"props":214,"children":215},{},[216],{"type":23,"value":217},"执行以下 Shell 命令，拉取推理容器镜像：",{"type":17,"tag":143,"props":219,"children":221},{"code":220},"docker pull swr.cn-central-221.ovaijisuan.com/mindformers/qwen3-next-80b-a3b:20250911\n",[222],{"type":17,"tag":148,"props":223,"children":224},{"__ignoreMap":7},[225],{"type":23,"value":220},{"type":17,"tag":25,"props":227,"children":228},{},[229],{"type":17,"tag":89,"props":230,"children":231},{},[232],{"type":23,"value":233},"3.3 启动容器",{"type":17,"tag":25,"props":235,"children":236},{},[237],{"type":23,"value":238},"执行以下命令创建并启动容器（/mnt/data/Qwen3-Next-80B-A3B-Thinking用于存放权重路径，若没有/mnt盘则要修改）。",{"type":17,"tag":143,"props":240,"children":242},{"code":241},"docker run -it \\\n--privileged \\\n--name=Qwen3-Next-80B-A3B-Thinking \\\n--net=host \\\n--cap-add=SYS_PTRACE \\\n--security-opt seccomp=unconfined \\\n--device=/dev/davinci0 \\\n--device=/dev/davinci1 \\\n--device=/dev/davinci2 \\\n--device=/dev/davinci3 \\\n--device=/dev/davinci4 \\\n--device=/dev/davinci5 \\\n--device=/dev/davinci6 \\\n--device=/dev/davinci7 \\\n--device=/dev/davinci_manager \\\n--device=/dev/hisi_hdc \\\n--device=/dev/devmm_svm \\\n--device=/dev/davinci_manager \\\n-v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \\\n-v /usr/local/Ascend/driver/:/usr/local/Ascend/driver/ \\\n-v /usr/local/sbin:/usr/local/sbin \\\n-v /etc/hccn.conf:/etc/hccn.conf \\\n-v /mnt/data/Qwen3-Next-80B-A3B-Thinking/:/mnt/data/Qwen3-Next-80B-A3B-Thinking/ \\\nswr.cn-central-221.ovaijisuan.com/mindformers/qwen3-next-80b-a3b:20250911 \\\n/bin/bash\n",[243],{"type":17,"tag":148,"props":244,"children":245},{"__ignoreMap":7},[246],{"type":23,"value":241},{"type":17,"tag":25,"props":248,"children":249},{},[250],{"type":17,"tag":89,"props":251,"children":252},{},[253],{"type":23,"value":254},"注意事项：",{"type":17,"tag":256,"props":257,"children":258},"ul",{},[259],{"type":17,"tag":260,"props":261,"children":262},"li",{},[263],{"type":23,"value":264},"后续操作，除了发起推理请求可以在容器外进行，其余操作均在容器内进行。",{"type":17,"tag":25,"props":266,"children":267},{},[268],{"type":17,"tag":89,"props":269,"children":270},{},[271],{"type":17,"tag":89,"props":272,"children":273},{},[274],{"type":23,"value":275},"4、服务化部署",{"type":17,"tag":25,"props":277,"children":278},{},[279],{"type":17,"tag":89,"props":280,"children":281},{},[282],{"type":23,"value":283},"4.1 添加环境变量",{"type":17,"tag":25,"props":285,"children":286},{},[287],{"type":23,"value":288},"在服务器中添加如下环境变量：",{"type":17,"tag":143,"props":290,"children":292},{"code":291},"export vLLM_MODEL_BACKEND=MindFormers\nexport MS_ENABLE_TRACE_MEMORY=off\n",[293],{"type":17,"tag":148,"props":294,"children":295},{"__ignoreMap":7},[296],{"type":23,"value":291},{"type":17,"tag":25,"props":298,"children":299},{},[300],{"type":17,"tag":89,"props":301,"children":302},{},[303],{"type":23,"value":304},"4.2 拉起服务",{"type":17,"tag":25,"props":306,"children":307},{},[308],{"type":23,"value":309},"执行以下命令拉起服务：",{"type":17,"tag":143,"props":311,"children":313},{"code":312},"python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model \"/mnt/data/Qwen3-Next-80B-A3B-Thinking\" --trust_remote_code --tensor_parallel_size=8 --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=32 --gpu-memory-utilization=0.9\n",[314],{"type":17,"tag":148,"props":315,"children":316},{"__ignoreMap":7},[317],{"type":23,"value":312},{"type":17,"tag":25,"props":319,"children":320},{},[321],{"type":17,"tag":89,"props":322,"children":323},{},[324],{"type":23,"value":325},"4.3 执行推理请求测试",{"type":17,"tag":25,"props":327,"children":328},{},[329],{"type":23,"value":330},"打开新的窗口，执行以下命令发送推理请求测试。",{"type":17,"tag":143,"props":332,"children":334},{"code":333},"curl http://localhost:8000/v1/chat/completions -H \"Content-Type: application/json\" -d '{\n  \"model\": \"/mnt/data/Qwen3-Next-80B-A3B-Thinking\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"介绍一下上海\"}\n  ],\n  \"temperature\": 0.6,\n  \"top_p\": 0.95,\n  \"top_k\": 20,\n  \"min_p\": 0,\n  \"max_tokens\": 128,\n  \"presence_penalty\": 1.05\n}'\n",[335],{"type":17,"tag":148,"props":336,"children":337},{"__ignoreMap":7},[338],{"type":23,"value":333},{"type":17,"tag":18,"props":340,"children":342},{"id":341},"_02-qwen3-next-80b-a3b-instruct-vllm-mindspore-推理指南",[343,348,349],{"type":17,"tag":89,"props":344,"children":345},{},[346],{"type":23,"value":347},"# 02",{"type":23,"value":95},{"type":17,"tag":89,"props":350,"children":351},{},[352],{"type":23,"value":353},"Qwen3-Next-80B-A3B-Instruct vllm-mindspore 推理指南",{"type":17,"tag":25,"props":355,"children":356},{},[357],{"type":17,"tag":89,"props":358,"children":359},{},[360],{"type":23,"value":361},"1、下载链接",{"type":17,"tag":25,"props":363,"children":364},{},[365,366],{"type":23,"value":120},{"type":17,"tag":39,"props":367,"children":369},{"href":65,"rel":368},[43],[370],{"type":23,"value":65},{"type":17,"tag":25,"props":372,"children":373},{},[374],{"type":17,"tag":89,"props":375,"children":376},{},[377],{"type":23,"value":378},"2、模型下载",{"type":17,"tag":25,"props":380,"children":381},{},[382],{"type":23,"value":383},"执行以下命令为自定义下载路径`/mnt/data/Qwen3-Next-80B-A3B-Instruct` 添加白名单。",{"type":17,"tag":143,"props":385,"children":387},{"code":386},"export HUB_WHITE_LIST_PATHS=/mnt/data/Qwen3-Next-80B-A3B-Instruct\n",[388],{"type":17,"tag":148,"props":389,"children":390},{"__ignoreMap":7},[391],{"type":23,"value":386},{"type":17,"tag":25,"props":393,"children":394},{},[395],{"type":23,"value":396},"执行以下命令从魔乐社区下载Qwen3-Next-80B-A3B-Instruct 权重文件至指定路径`/mnt/data/Qwen3-Next-80B-A3B-Instruct` 。下载的文件包含模型代码、权重、分词模型和示例代码，占用约 152GB 的磁盘空间，请预留足够空间。",{"type":17,"tag":143,"props":398,"children":399},{"code":159},[400],{"type":17,"tag":148,"props":401,"children":402},{"__ignoreMap":7},[403],{"type":23,"value":159},{"type":17,"tag":143,"props":405,"children":407},{"code":406},"python\nfrom openmind_hub import snapshot_download\nsnapshot_download(\n   repo_id=\"MindSpore-Lab/Qwen3-Next-80B-A3B-Instruct\",\n   local_dir=\"/mnt/data/Qwen3-Next-80B-A3B-Instruct\",\n   local_dir_use_symlinks=False\n)\nexit()\n",[408],{"type":17,"tag":148,"props":409,"children":410},{"__ignoreMap":7},[411],{"type":23,"value":406},{"type":17,"tag":25,"props":413,"children":414},{},[415],{"type":17,"tag":89,"props":416,"children":417},{},[418],{"type":23,"value":183},{"type":17,"tag":25,"props":420,"children":421},{},[422],{"type":23,"value":423},"Qwen3-Next-80B-A3B-Instruct推理需要1台（8卡）Atlas 800T/800I A2（64G）服务器服务器（基于BF16权重）。昇思MindSpore提供了Qwen3-Next-80B-A3B-Instruct推理可用的Docker容器镜像，供开发者快速体验。",{"type":17,"tag":25,"props":425,"children":426},{},[427],{"type":17,"tag":89,"props":428,"children":429},{},[430],{"type":23,"value":196},{"type":17,"tag":143,"props":432,"children":433},{"code":199},[434],{"type":17,"tag":148,"props":435,"children":436},{"__ignoreMap":7},[437],{"type":23,"value":199},{"type":17,"tag":25,"props":439,"children":440},{},[441],{"type":17,"tag":89,"props":442,"children":443},{},[444],{"type":23,"value":212},{"type":17,"tag":25,"props":446,"children":447},{},[448],{"type":23,"value":217},{"type":17,"tag":143,"props":450,"children":451},{"code":220},[452],{"type":17,"tag":148,"props":453,"children":454},{"__ignoreMap":7},[455],{"type":23,"value":220},{"type":17,"tag":25,"props":457,"children":458},{},[459],{"type":17,"tag":89,"props":460,"children":461},{},[462],{"type":23,"value":233},{"type":17,"tag":25,"props":464,"children":465},{},[466],{"type":23,"value":467},"执行以下命令创建并启动容器（/mnt/data/Qwen3-Next-80B-A3B-Instruct用于存放权重路径，若没有/mnt盘则要修改）。",{"type":17,"tag":143,"props":469,"children":471},{"code":470},"docker run -it \\\n--privileged \\\n--name=Qwen3-Next-80B-A3B-Instruct \\\n--net=host \\\n--cap-add=SYS_PTRACE \\\n--security-opt seccomp=unconfined \\\n--device=/dev/davinci0 \\\n--device=/dev/davinci1 \\\n--device=/dev/davinci2 \\\n--device=/dev/davinci3 \\\n--device=/dev/davinci4 \\\n--device=/dev/davinci5 \\\n--device=/dev/davinci6 \\\n--device=/dev/davinci7 \\\n--device=/dev/davinci_manager \\\n--device=/dev/hisi_hdc \\\n--device=/dev/devmm_svm \\\n--device=/dev/davinci_manager \\\n-v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \\\n-v /usr/local/Ascend/driver/:/usr/local/Ascend/driver/ \\\n-v /usr/local/sbin:/usr/local/sbin \\\n-v /etc/hccn.conf:/etc/hccn.conf \\\n-v /mnt/data/Qwen3-Next-80B-A3B-Instruct/:/mnt/data/Qwen3-Next-80B-A3B-Instruct/ \\\nswr.cn-central-221.ovaijisuan.com/mindformers/qwen3-next-80b-a3b:20250911 \\\n/bin/bash\n",[472],{"type":17,"tag":148,"props":473,"children":474},{"__ignoreMap":7},[475],{"type":23,"value":470},{"type":17,"tag":25,"props":477,"children":478},{},[479],{"type":17,"tag":89,"props":480,"children":481},{},[482],{"type":23,"value":254},{"type":17,"tag":256,"props":484,"children":485},{},[486],{"type":17,"tag":260,"props":487,"children":488},{},[489],{"type":23,"value":264},{"type":17,"tag":25,"props":491,"children":492},{},[493],{"type":17,"tag":89,"props":494,"children":495},{},[496],{"type":17,"tag":89,"props":497,"children":498},{},[499],{"type":23,"value":275},{"type":17,"tag":25,"props":501,"children":502},{},[503],{"type":17,"tag":89,"props":504,"children":505},{},[506],{"type":23,"value":283},{"type":17,"tag":25,"props":508,"children":509},{},[510],{"type":23,"value":511},"在服务器中都添加如下环境变量：",{"type":17,"tag":143,"props":513,"children":514},{"code":291},[515],{"type":17,"tag":148,"props":516,"children":517},{"__ignoreMap":7},[518],{"type":23,"value":291},{"type":17,"tag":25,"props":520,"children":521},{},[522],{"type":17,"tag":89,"props":523,"children":524},{},[525],{"type":23,"value":304},{"type":17,"tag":25,"props":527,"children":528},{},[529],{"type":23,"value":309},{"type":17,"tag":143,"props":531,"children":533},{"code":532},"python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model \"/mnt/data/Qwen3-Next-80B-A3B-Instruct\" --trust_remote_code --tensor_parallel_size=8 --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=32 --gpu-memory-utilization=0.9\n",[534],{"type":17,"tag":148,"props":535,"children":536},{"__ignoreMap":7},[537],{"type":23,"value":532},{"type":17,"tag":25,"props":539,"children":540},{},[541],{"type":17,"tag":89,"props":542,"children":543},{},[544],{"type":23,"value":325},{"type":17,"tag":25,"props":546,"children":547},{},[548],{"type":23,"value":330},{"type":17,"tag":143,"props":550,"children":552},{"code":551},"curl http://localhost:8000/v1/chat/completions -H \"Content-Type:\napplication/json\" -d '{\n  \"model\": \"/mnt/data/Qwen3-Next-80B-A3B-Instruct\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"介绍一下上海\"}\n  ],\n  \"temperature\": 0.6,\n  \"top_p\": 0.95,\n  \"top_k\": 20,\n  \"min_p\": 0,\n  \"max_tokens\": 128,\n  \"presence_penalty\": 1.05\n}'\n",[553],{"type":17,"tag":148,"props":554,"children":555},{"__ignoreMap":7},[556],{"type":23,"value":551},{"title":7,"searchDepth":558,"depth":558,"links":559},4,[],"markdown","content:technology-blogs:zh:3831.md","content","technology-blogs/zh/3831.md","technology-blogs/zh/3831","md",1776506135906]