[{"data":1,"prerenderedAt":261},["ShallowReactive",2],{"content-query-48v9WHyPYi":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"category":13,"body":14,"_type":255,"_id":256,"_source":257,"_file":258,"_stem":259,"_extension":260},"/technology-blogs/zh/3685","zh",false,"","快速支持DeepSeek-V3-0324，昇思MindSpore+vLLM服务化部署开源版本上线","近日，基于昇思 MindSpore AI 框架+vLLM的DeepSeek-V3-0324 模型完成适配并上线魔乐社区。","2025-04-07","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/04/11/b37a56f97930448a86230f32ea1eb12c.png","technology-blogs","实践",{"type":15,"children":16,"toc":245},"root",[17,25,30,35,48,66,71,76,86,91,99,104,112,121,129,134,142,147,155,164,172,177,185,193,198,206,211,219,224,232,237],{"type":18,"tag":19,"props":20,"children":22},"element","h1",{"id":21},"快速支持deepseek-v3-0324昇思mindsporevllm服务化部署开源版本上线",[23],{"type":24,"value":8},"text",{"type":18,"tag":26,"props":27,"children":28},"p",{},[29],{"type":24,"value":9},{"type":18,"tag":26,"props":31,"children":32},{},[33],{"type":24,"value":34},"作为开源AI框架，MindSpore 以其卓越的性能优化、灵活的模型开发能力和高效的分布式训练能力，为大规模模型训推部署提供全流程开源支持。通过如下步骤，即可快速体验昇思MindSpore版本的DeepSeek-V3-0324服务化。",{"type":18,"tag":26,"props":36,"children":37},{},[38,40],{"type":24,"value":39},"魔乐社区链接：",{"type":18,"tag":41,"props":42,"children":46},"a",{"href":43,"rel":44},"https://modelers.cn/models/MindSpore-Lab/DeepSeek-V3-0324",[45],"nofollow",[47],{"type":24,"value":43},{"type":18,"tag":49,"props":50,"children":52},"h3",{"id":51},"_01-环境搭建",[53,59,61],{"type":18,"tag":54,"props":55,"children":56},"strong",{},[57],{"type":24,"value":58},"# 01",{"type":24,"value":60}," ",{"type":18,"tag":54,"props":62,"children":63},{},[64],{"type":24,"value":65},"环境搭建",{"type":18,"tag":26,"props":67,"children":68},{},[69],{"type":24,"value":70},"环境准备：四台Atlas 800I A2 (64G)，并配置好组网，四台设备的卡与卡之间能够互相ping通。",{"type":18,"tag":26,"props":72,"children":73},{},[74],{"type":24,"value":75},"四台设备分别拉取镜像：",{"type":18,"tag":77,"props":78,"children":80},"pre",{"code":79},"\ndocker pull hub.oepkgs.net/oedeploy/openeuler/aarch64/mindspore:20250326\n",[81],{"type":18,"tag":82,"props":83,"children":84},"code",{"__ignoreMap":7},[85],{"type":24,"value":79},{"type":18,"tag":26,"props":87,"children":88},{},[89],{"type":24,"value":90},"四台设备分别杀进程：",{"type":18,"tag":77,"props":92,"children":94},{"code":93},"\nps -ef | grep python | grep -v grep | awk '{print $2}' | xargs kill -9\nps -ef | grep ray | grep -v grep | awk '{print $2}' | xargs kill -9\n",[95],{"type":18,"tag":82,"props":96,"children":97},{"__ignoreMap":7},[98],{"type":24,"value":93},{"type":18,"tag":26,"props":100,"children":101},{},[102],{"type":24,"value":103},"四台设备分别启动容器，四台设备的hostname需要不同。/data/DeepSeek-V3-0324用于存放权重及yaml配置文件。",{"type":18,"tag":77,"props":105,"children":107},{"code":106},"docker run -it --name=DeepSeek_V3_0324 --ipc=host --network=host --privileged=true --hostname=worker3 \\\n        --device=/dev/davinci0 \\\n        --device=/dev/davinci1 \\\n        --device=/dev/davinci2 \\\n        --device=/dev/davinci3 \\\n        --device=/dev/davinci4 \\\n        --device=/dev/davinci5 \\\n        --device=/dev/davinci6 \\\n        --device=/dev/davinci7 \\\n        --device=/dev/davinci_manager \\\n        --device=/dev/devmm_svm \\\n        --device=/dev/hisi_hdc \\\n        -v /usr/local/sbin/:/usr/local/sbin/ \\\n        -v /var/log/npu/slog/:/var/log/npu/slog \\\n        -v /var/log/npu/profiling/:/var/log/npu/profiling \\\n        -v /var/log/npu/dump/:/var/log/npu/dump \\\n        -v /var/log/npu/:/usr/slog \\\n        -v /etc/hccn.conf:/etc/hccn.conf \\\n        -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \\\n        -v /usr/local/dcmi:/usr/local/dcmi \\\n        -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \\\n        -v /etc/ascend_install.info:/etc/ascend_install.info \\\n        -v /etc/vnpu.cfg:/etc/vnpu.cfg \\\n        -v /data:/data \\\n        --pids-limit 409600 \\\n        --shm-size=\"250g\" \\\n        834520af4bdd \\\n        /bin/bash\n",[108],{"type":18,"tag":82,"props":109,"children":110},{"__ignoreMap":7},[111],{"type":24,"value":106},{"type":18,"tag":49,"props":113,"children":115},{"id":114},"_02",[116],{"type":18,"tag":54,"props":117,"children":118},{},[119],{"type":24,"value":120},"# 02",{"type":18,"tag":49,"props":122,"children":124},{"id":123},"下载权重及yaml配置文件",[125],{"type":18,"tag":54,"props":126,"children":127},{},[128],{"type":24,"value":123},{"type":18,"tag":26,"props":130,"children":131},{},[132],{"type":24,"value":133},"魔乐社区权重及yaml配置文件下载：",{"type":18,"tag":77,"props":135,"children":137},{"code":136},"\nfrom openmind_hub import snapshot_download\n \nsnapshot_download(\n    repo_id=\"MindSpore-Lab/DeepSeek-V3-0324\",\n    local_dir=\"/data/DeepSeek-V3-0324\",\n    local_dir_use_symlink=False\n)\n",[138],{"type":18,"tag":82,"props":139,"children":140},{"__ignoreMap":7},[141],{"type":24,"value":136},{"type":18,"tag":26,"props":143,"children":144},{},[145],{"type":24,"value":146},"yaml配置文件修改：",{"type":18,"tag":77,"props":148,"children":150},{"code":149},"# 修改为模型权重路径 \nload_checkpoint: '/data/DeepSeek-V3-0324'\n \n# 修改为模型tokenizer.json文件所在路径\nvocab_file: '/data/DeepSeek-V3-0324/tokenizer.json'\n \n# 修改为模型tokenizer.json文件所在路径\ntokenizer_file: '/data/DeepSeek-V3-0324/tokenizer.json'\n",[151],{"type":18,"tag":82,"props":152,"children":153},{"__ignoreMap":7},[154],{"type":24,"value":149},{"type":18,"tag":49,"props":156,"children":158},{"id":157},"_03",[159],{"type":18,"tag":54,"props":160,"children":161},{},[162],{"type":24,"value":163},"# 03",{"type":18,"tag":49,"props":165,"children":167},{"id":166},"启动推理服务",[168],{"type":18,"tag":54,"props":169,"children":170},{},[171],{"type":24,"value":166},{"type":18,"tag":26,"props":173,"children":174},{},[175],{"type":24,"value":176},"容器中添加环境变量：",{"type":18,"tag":77,"props":178,"children":180},{"code":179},"export MINDFORMERS_MODEL_CONFIG=/data/DeepSeek-V3-0324/predict_deepseek3_671b.yaml\nexport ASCEND_CUSTOM_PATH=$ASCEND_HOME_PATH/../\nexport vLLM_MODEL_BACKEND=MindFormers\nexport vLLM_MODEL_MEMORY_USE_GB=50\nexport ASCEND_TOTAL_MEMORY_GB=64\nexport MS_ENABLE_LCCL=off\nexport HCCL_OP_EXPANSION_MODE=AIV\nexport HCCL_SOCKET_IFNAME=enp189s0f0\nexport GLOO_SOCKET_IFNAME=enp189s0f0\nexport TP_SOCKET_IFNAME=enp189s0f0\nexport HCCL_CONNECT_TIMEOUT=3600\nexport ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7\n",[181],{"type":18,"tag":82,"props":182,"children":183},{"__ignoreMap":7},[184],{"type":24,"value":179},{"type":18,"tag":26,"props":186,"children":187},{},[188],{"type":18,"tag":54,"props":189,"children":190},{},[191],{"type":24,"value":192},"主机及辅机设置",{"type":18,"tag":26,"props":194,"children":195},{},[196],{"type":24,"value":197},"选择一台设备作为主节点，执行如下命令：",{"type":18,"tag":77,"props":199,"children":201},{"code":200},"ray stop\nray start --head --port=6380\n",[202],{"type":18,"tag":82,"props":203,"children":204},{"__ignoreMap":7},[205],{"type":24,"value":200},{"type":18,"tag":26,"props":207,"children":208},{},[209],{"type":24,"value":210},"其他三台设备作为辅节点，依次执行如下命令：",{"type":18,"tag":77,"props":212,"children":214},{"code":213},"\nray stop\nray start --address=主节点IP:6380\n",[215],{"type":18,"tag":82,"props":216,"children":217},{"__ignoreMap":7},[218],{"type":24,"value":213},{"type":18,"tag":26,"props":220,"children":221},{},[222],{"type":24,"value":223},"主节点容器中拉起服务：",{"type":18,"tag":77,"props":225,"children":227},{"code":226},"python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model \"模型权重路径\" --trust_remote_code --tensor_parallel_size=32 --enable-prefix-caching --enable-chunked-prefill --max-num-seqs=256 --block-size=32 --max_model_len=70000 --max-num-batched-tokens=2048 --distributed-executor-backend=ray\n",[228],{"type":18,"tag":82,"props":229,"children":230},{"__ignoreMap":7},[231],{"type":24,"value":226},{"type":18,"tag":26,"props":233,"children":234},{},[235],{"type":24,"value":236},"发起推理服务请求，若在主节点发起请求，新开一个终端，IP地址是0.0.0.0或者localhost。",{"type":18,"tag":77,"props":238,"children":240},{"code":239},"curl http://localhost:8000/v1/completions -H \"Content-Type: application/json\" -d '{\"model\": \"模型权重路径\", \"prompt\": \"请介绍下北京的top景点\", \"temperature\": 0, \"max_tokens\": 256, \"top_p\": 1.0, \"top_k\": 1, \"repetition_penalty\":1.0}'\n",[241],{"type":18,"tag":82,"props":242,"children":243},{"__ignoreMap":7},[244],{"type":24,"value":239},{"title":7,"searchDepth":246,"depth":246,"links":247},4,[248,251,252,253,254],{"id":51,"depth":249,"text":250},3,"# 01 环境搭建",{"id":114,"depth":249,"text":120},{"id":123,"depth":249,"text":123},{"id":157,"depth":249,"text":163},{"id":166,"depth":249,"text":166},"markdown","content:technology-blogs:zh:3685.md","content","technology-blogs/zh/3685.md","technology-blogs/zh/3685","md",1776506133216]