[{"data":1,"prerenderedAt":327},["ShallowReactive",2],{"content-query-3PxjN3ZRLe":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":321,"_id":322,"_source":323,"_file":324,"_stem":325,"_extension":326},"/technology-blogs/zh/3753","zh",false,"","0day同步！昇思MindSpore框架成功适配面壁MiniCPM4.0模型","6 月 6 号，面壁智能正式发布并开源了端侧「小钢炮」系列最新力作——MiniCPM 4.0 模型，实现了最快震撼的 220 倍极限加速！","2025-06-06","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/13/ca51743e8e31470a9d8d989f4b463985.png","technology-blogs","实践",{"type":15,"children":16,"toc":318},"root",[17,25,31,36,47,52,61,70,82,93,98,106,111,118,130,137,142,147,154,162,173,181,189,194,199,209,214,222,230,235,243,248,256,261,271,279,284,292,300,313],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"_0day同步昇思mindspore框架成功适配面壁minicpm40模型",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":30},"6 月 6 号，面壁智能正式发布并开源了端侧「小钢炮」系列最新力作——MiniCPM 4.0 模型，实现了最快震撼的 220 倍极限加速！昇思MindSpore框架对齐主流生态接口，不断提升HuggingFace Transformers类模型接入vllm-mindspore框架效率，同时基于vLLM推理框架与MindSpore One套件，0day支持MiniCPM4.0双尺寸模型快速适配。欢迎广大开发者下载体验！",{"type":18,"tag":26,"props":32,"children":33},{},[34],{"type":24,"value":35},"MindSpore-vLLM代码仓：",{"type":18,"tag":26,"props":37,"children":38},{},[39],{"type":18,"tag":40,"props":41,"children":45},"a",{"href":42,"rel":43},"https://gitee.com/mindspore/vllm-mindspore",[44],"nofollow",[46],{"type":24,"value":42},{"type":18,"tag":26,"props":48,"children":49},{},[50],{"type":24,"value":51},"体验指导链接：",{"type":18,"tag":26,"props":53,"children":54},{},[55],{"type":18,"tag":40,"props":56,"children":59},{"href":57,"rel":58},"https://modelers.cn/models/MindSpore-Lab/MiniCPM4-8B",[44],[60],{"type":24,"value":57},{"type":18,"tag":26,"props":62,"children":63},{},[64],{"type":18,"tag":40,"props":65,"children":68},{"href":66,"rel":67},"https://modelers.cn/models/MindSpore-Lab/MiniCPM4-0.5B",[44],[69],{"type":24,"value":66},{"type":18,"tag":26,"props":71,"children":72},{},[73],{"type":18,"tag":74,"props":75,"children":76},"strong",{},[77],{"type":18,"tag":74,"props":78,"children":79},{},[80],{"type":24,"value":81},"# 01",{"type":18,"tag":26,"props":83,"children":84},{},[85],{"type":18,"tag":74,"props":86,"children":87},{},[88],{"type":18,"tag":74,"props":89,"children":90},{},[91],{"type":24,"value":92},"模型介绍",{"type":18,"tag":26,"props":94,"children":95},{},[96],{"type":24,"value":97},"MiniCPM 4-8B 「闪电版」，为新一代稀疏高效架构模型，通过架构层、算法层、系统层、数据层等多维度的技术创新，MiniCPM 4.0 相较于 Qwen-3-8B、Llama-3-8B、GLM-4-9B等同等参数规模端侧模型，**实现了长文本推理速度 5 倍稳定加速以及最高 70 倍加速，并实现了同级最佳的模型性能，持续卫冕全球最强端侧模型。**继 Deepseek 之后，成为大模型领域又一次「标杆式创新」典范。",{"type":18,"tag":26,"props":99,"children":100},{},[101],{"type":18,"tag":102,"props":103,"children":105},"img",{"alt":7,"src":104},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/13/ff25e094e65a409f9d624a4f7599eb12.png",[],{"type":18,"tag":26,"props":107,"children":108},{},[109],{"type":24,"value":110},"**MiniCPM 4.0 推出端侧性能“大小王”组合，拥有 8B 、0.5B 两种参数规模，均实现了同级最佳的模型性能。**其中，MiniCPM 4.0-8B 模型为稀疏注意力模型，在MMLU、CEval、MATH500、HumanEval等基准测试中，MiniCPM 4.0-8B 性能超越 Qwen-3-8B、Gemma-3-12B。MiniCPM 4.0-0.5B 在性能上，也显著优于 Qwen-3-0.6B，并实现了最快 600 Token/s 的极速推理速度，成为速度最快的轻量级模型。",{"type":18,"tag":26,"props":112,"children":113},{},[114],{"type":18,"tag":102,"props":115,"children":117},{"alt":7,"src":116},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/13/c4683fc95cc945d6bb201e1b156d8e86.png",[],{"type":18,"tag":26,"props":119,"children":120},{},[121,123,128],{"type":24,"value":122},"针对单一架构难以兼顾长、短文本不同场景的技术难题，MiniCPM 4.0-8B 采用**「高效双频换挡」机制**，能够根据任务特征自动切换注意力模式：",{"type":18,"tag":74,"props":124,"children":125},{},[126],{"type":24,"value":127},"在处理高难度的长文本、深度思考任务时，启用稀疏注意力以降低计算复杂度，在短文本场景下切换至稠密注意力以确保精度",{"type":24,"value":129},"，实现了长、短文本切换的高效响应。",{"type":18,"tag":26,"props":131,"children":132},{},[133],{"type":18,"tag":102,"props":134,"children":136},{"alt":7,"src":135},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/13/326bee4923054455be49efd320d81c92.png",[],{"type":18,"tag":26,"props":138,"children":139},{},[140],{"type":24,"value":141},"值得一提的是，MiniCPM 4.0 实现了长文本缓存的大幅锐减。在 128K 长文本场景下，MiniCPM 4.0-8B 相较于 Qwen3-8B 仅需 1/4 的缓存存储空间。",{"type":18,"tag":26,"props":143,"children":144},{},[145],{"type":24,"value":146},"此外， MiniCPM 4.0可在 vLLm、SGLang、llama.cpp、LlamaFactory、XTuner等开源框架部署；同时加强了对 MCP的支持，且性能超过同尺寸开源模型（ Qwen-3-8B），进一步拓展了模型开发、应用潜力。",{"type":18,"tag":26,"props":148,"children":149},{},[150],{"type":18,"tag":102,"props":151,"children":153},{"alt":7,"src":152},"https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/06/13/fabf31698b764a1995a32d94cc182db4.png",[],{"type":18,"tag":26,"props":155,"children":156},{},[157],{"type":18,"tag":74,"props":158,"children":159},{},[160],{"type":24,"value":161},"# 02",{"type":18,"tag":26,"props":163,"children":164},{},[165],{"type":18,"tag":74,"props":166,"children":167},{},[168],{"type":18,"tag":74,"props":169,"children":170},{},[171],{"type":24,"value":172},"快速开始",{"type":18,"tag":26,"props":174,"children":175},{},[176],{"type":18,"tag":74,"props":177,"children":178},{},[179],{"type":24,"value":180},"1",{"type":18,"tag":26,"props":182,"children":183},{},[184],{"type":18,"tag":74,"props":185,"children":186},{},[187],{"type":24,"value":188},"环境搭建",{"type":18,"tag":26,"props":190,"children":191},{},[192],{"type":24,"value":193},"环境准备：Atlas 800I/800T A2 (64G)，单卡。",{"type":18,"tag":26,"props":195,"children":196},{},[197],{"type":24,"value":198},"执行以下 Shell 命令，拉取MindSpore MiniCPM 推理容器镜像：",{"type":18,"tag":200,"props":201,"children":203},"pre",{"code":202},"\ndocker pull swr.cn-central-221.ovaijisuan.com/mindsporelab/minicpm:v4\n",[204],{"type":18,"tag":205,"props":206,"children":207},"code",{"__ignoreMap":7},[208],{"type":24,"value":202},{"type":18,"tag":26,"props":210,"children":211},{},[212],{"type":24,"value":213},"2",{"type":18,"tag":26,"props":215,"children":216},{},[217],{"type":18,"tag":74,"props":218,"children":219},{},[220],{"type":24,"value":221},"通过魔乐社区下载权重",{"type":18,"tag":200,"props":223,"children":225},{"code":224},"pip install openmind_hub\nexport HUB_WHITE_LIST_PATHS=/mnt/data/MiniCPM8B   # 该路径存放权重\n\nfrom openmind_hub import snapshot_download\nsnapshot_download(\n    repo_id=\"MindSpore-Lab/MiniCPM4-8B\",\n    local_dir=\"/mnt/data/MiniCPM8B\",\n    local_dir_use_symlinks=False\n)\n",[226],{"type":18,"tag":205,"props":227,"children":228},{"__ignoreMap":7},[229],{"type":24,"value":224},{"type":18,"tag":26,"props":231,"children":232},{},[233],{"type":24,"value":234},"3",{"type":18,"tag":26,"props":236,"children":237},{},[238],{"type":18,"tag":74,"props":239,"children":240},{},[241],{"type":24,"value":242},"创建并进入容器",{"type":18,"tag":26,"props":244,"children":245},{},[246],{"type":24,"value":247},"执行以下命令创建容器,name设置为MiniCPM8B。",{"type":18,"tag":200,"props":249,"children":251},{"code":250},"docker run -it --privileged  --name=MiniCPM8B --net=host \\\n   --shm-size 500g \\\n   --device=/dev/davinci0 \\\n   --device=/dev/davinci1 \\\n   --device=/dev/davinci2 \\\n   --device=/dev/davinci3 \\\n   --device=/dev/davinci4 \\\n   --device=/dev/davinci5 \\\n   --device=/dev/davinci6 \\\n   --device=/dev/davinci7 \\\n   --device=/dev/davinci_manager \\\n   --device=/dev/hisi_hdc \\\n   --device /dev/devmm_svm \\\n   -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \\\n   -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \\\n   -v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \\\n   -v /usr/local/sbin:/usr/local/sbin \\\n   -v /etc/hccn.conf:/etc/hccn.conf \\\n   -v /mnt/data/MiniCPM8B/:/mnt/data/MiniCPM8B/  \\\n   swr.cn-central-221.ovaijisuan.com/mindsporelab/minicpm:v4 \\\n   bash\n",[252],{"type":18,"tag":205,"props":253,"children":254},{"__ignoreMap":7},[255],{"type":24,"value":250},{"type":18,"tag":26,"props":257,"children":258},{},[259],{"type":24,"value":260},"4",{"type":18,"tag":26,"props":262,"children":263},{},[264,269],{"type":18,"tag":74,"props":265,"children":266},{},[267],{"type":24,"value":268},"通****过",{"type":24,"value":270},"vllm_mindspore拉起推理服务",{"type":18,"tag":200,"props":272,"children":274},{"code":273},"python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model \"/mnt/data/MiniCPM8B\"\n --trust_remote_code --tensor_parallel_size=1 --max-num-seqs=256 --block-size=32 --max_model_len=8192 --max-num-batched-tokens=8192\n",[275],{"type":18,"tag":205,"props":276,"children":277},{"__ignoreMap":7},[278],{"type":24,"value":273},{"type":18,"tag":26,"props":280,"children":281},{},[282],{"type":24,"value":283},"5",{"type":18,"tag":26,"props":285,"children":286},{},[287],{"type":18,"tag":74,"props":288,"children":289},{},[290],{"type":24,"value":291},"打开一个新的窗口，然后发起推理请求",{"type":18,"tag":200,"props":293,"children":295},{"code":294},"\ncurl http://localhost:8000/v1/chat/completions -H \"Content-Type: application/json\" -d '{\n  \"model\": \"/mnt/data/MiniCPM8B\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"帮我介绍一下上海\"}\n  ],\n  \"temperature\": 0.0,\n  \"top_p\": 0.95,\n  \"top_k\": 20,\n  \"min_p\": 0,\n  \"max_tokens\": 4096\n}'\n",[296],{"type":18,"tag":205,"props":297,"children":298},{"__ignoreMap":7},[299],{"type":24,"value":294},{"type":18,"tag":26,"props":301,"children":302},{},[303,305,311],{"type":24,"value":304},"本文档提供的模型代码、权重文件和部署镜像，仅限于基于昇思MindSpore AI框架体验MiniCPM4.0的部署效果，如需生产使用，欢迎邮件联系我们。（",{"type":18,"tag":40,"props":306,"children":308},{"href":307},"mailto:contact@public.mindspore.cn",[309],{"type":24,"value":310},"contact@public.mindspore.cn",{"type":24,"value":312},"）",{"type":18,"tag":26,"props":314,"children":315},{},[316],{"type":24,"value":317},"昇思MindSpore AI框架将持续支持更多业界主流模型，欢迎大家试用并提供宝贵的反馈意见。",{"title":7,"searchDepth":319,"depth":319,"links":320},4,[],"markdown","content:technology-blogs:zh:3753.md","content","technology-blogs/zh/3753.md","technology-blogs/zh/3753","md",1776506134654]