[{"data":1,"prerenderedAt":616},["ShallowReactive",2],{"content-query-oY2vwxeo7u":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"date":10,"cover":11,"type":12,"body":13,"_type":610,"_id":611,"_source":612,"_file":613,"_stem":614,"_extension":615},"/news/zh/3806","zh",false,"","0day同步！昇思MindSpore开源社区上线智谱GLM-4.5与GLM-4.5-Air大模型","小时级完成模型无缝迁移","2025-07-30","https://obs-mindspore-file.obs.cn-north-4.myhuaweicloud.com/file/2025/08/01/8deff3c328fa413d9315964528faebab.png","news",{"type":14,"children":15,"toc":607},"root",[16,24,30,43,48,57,62,71,80,85,90,95,104,112,122,130,135,145,150,158,166,174,189,197,202,210,218,226,231,239,247,252,257,265,272,280,288,296,301,309,317,322,327,335,340,348,356,361,369,384,391,400,407,412,420,425,432,440,447,459,466,471,479,486,494,499,506,513,518,526,533,540,547,554,559,566,574,579,587,595,599],{"type":17,"tag":18,"props":19,"children":21},"element","h1",{"id":20},"_0day同步昇思mindspore开源社区上线智谱glm-45与glm-45-air大模型",[22],{"type":23,"value":8},"text",{"type":17,"tag":25,"props":26,"children":27},"p",{},[28],{"type":23,"value":29},"7月28日，智谱AI发布新一代旗舰模型 GLM-4.5，昇思MindSpore通过提供主流生态接口，依托MindSpore Transformers套件和MindSpore-vLLM插件，小时级完成模型无缝迁移，并将版本上传至开源社区，欢迎广大开发者下载体验！",{"type":17,"tag":25,"props":31,"children":32},{},[33,35],{"type":23,"value":34},"MindSpore Transformers代码仓：",{"type":17,"tag":36,"props":37,"children":41},"a",{"href":38,"rel":39},"https://gitee.com/mindspore/mindformers",[40],"nofollow",[42],{"type":23,"value":38},{"type":17,"tag":25,"props":44,"children":45},{},[46],{"type":23,"value":47},"MindSpore-vLLM代码仓：",{"type":17,"tag":25,"props":49,"children":50},{},[51],{"type":17,"tag":36,"props":52,"children":55},{"href":53,"rel":54},"https://gitee.com/mindspore/vllm-mindspore",[40],[56],{"type":23,"value":53},{"type":17,"tag":25,"props":58,"children":59},{},[60],{"type":23,"value":61},"链接地址：",{"type":17,"tag":25,"props":63,"children":64},{},[65],{"type":17,"tag":36,"props":66,"children":69},{"href":67,"rel":68},"https://modelers.cn/models/MindSpore-Lab/GLM-4.5",[40],[70],{"type":23,"value":67},{"type":17,"tag":25,"props":72,"children":73},{},[74],{"type":17,"tag":36,"props":75,"children":78},{"href":76,"rel":77},"https://modelers.cn/models/MindSpore-Lab/GLM-4.5-Air",[40],[79],{"type":23,"value":76},{"type":17,"tag":25,"props":81,"children":82},{},[83],{"type":23,"value":84},"衡量 AGI 的第一性原理，是在不损失原有能力的前提下融合更多通用智能能力，GLM-4.5 是对此理念的首次完整呈现，并有幸取得技术突破。GLM-4.5 首次在单个模型中实现将推理、编码和智能体能力原生融合，以满足智能体应用的复杂需求。",{"type":17,"tag":25,"props":86,"children":87},{},[88],{"type":23,"value":89},"为综合衡量模型的通用能力，智谱选择了最具有代表性的 12 个评测基准，包括 MMLU Pro、AIME 24、MATH 500、SciCode、GPQA 、HLE、LiveCodeBench、SWE-Bench Verified、Terminal-Bench、TAU-Bench、BFCL v3 和 BrowseComp。综合平均分，GLM-4.5 取得了全球模型第三、国产模型第一，开源模型第一。",{"type":17,"tag":25,"props":91,"children":92},{},[93],{"type":23,"value":94},"GLM-4.5 和 GLM-4.5-Air 使用了相似的训练流程：首先在 15 万亿 token 的通用数据上进行了预训练，此后在代码、推理、智能体等领域的 8 万亿 token 数据上进行针对性训练，最后通过强化学习进一步增强模型推理、代码与智能体能力。",{"type":17,"tag":25,"props":96,"children":97},{},[98],{"type":17,"tag":99,"props":100,"children":101},"strong",{},[102],{"type":23,"value":103},"# 01****GLM-4.5 vllm-mindspore 推理指南",{"type":17,"tag":25,"props":105,"children":106},{},[107],{"type":17,"tag":99,"props":108,"children":109},{},[110],{"type":23,"value":111},"1、下载链接",{"type":17,"tag":25,"props":113,"children":114},{},[115,117],{"type":23,"value":116},"魔乐社区：",{"type":17,"tag":36,"props":118,"children":120},{"href":67,"rel":119},[40],[121],{"type":23,"value":67},{"type":17,"tag":25,"props":123,"children":124},{},[125],{"type":17,"tag":99,"props":126,"children":127},{},[128],{"type":23,"value":129},"2、模型分别下载到2台服务器，存放路径保持一致。",{"type":17,"tag":25,"props":131,"children":132},{},[133],{"type":23,"value":134},"执行以下命令为2台服务器上的自定义下载路径 /mnt/data/GLM-4.5 添加白名单：",{"type":17,"tag":136,"props":137,"children":139},"pre",{"code":138},"export HUB_WHITE_LIST_PATHS=/mnt/data/GLM-4.5\n",[140],{"type":17,"tag":141,"props":142,"children":143},"code",{"__ignoreMap":7},[144],{"type":23,"value":138},{"type":17,"tag":25,"props":146,"children":147},{},[148],{"type":23,"value":149},"执行以下命令从魔乐社区下载GLM-4.5权重文件至指定路径 /mnt/data/GLM-4.5。2台服务器分别下载，都需要占用约 740GB 的磁盘空间：",{"type":17,"tag":136,"props":151,"children":153},{"code":152},"pip install openmind_hub\n",[154],{"type":17,"tag":141,"props":155,"children":156},{"__ignoreMap":7},[157],{"type":23,"value":152},{"type":17,"tag":136,"props":159,"children":161},{"code":160},"python\n\nfrom openmind_hub import snapshot_download\n\nsnapshot_download(\n    repo_id=\"MindSpore-Lab/GLM-4.5\",\n    local_dir=\"/mnt/data/GLM-4.5\",\n    local_dir_use_symlinks=False\n)\nexit()\n",[162],{"type":17,"tag":141,"props":163,"children":164},{"__ignoreMap":7},[165],{"type":23,"value":160},{"type":17,"tag":25,"props":167,"children":168},{},[169],{"type":17,"tag":99,"props":170,"children":171},{},[172],{"type":23,"value":173},"注意事项：",{"type":17,"tag":175,"props":176,"children":177},"ul",{},[178,184],{"type":17,"tag":179,"props":180,"children":181},"li",{},[182],{"type":23,"value":183},"/mnt/data/GLM-4.5 可修改为自定义路径，确保2台服务器的该路径有足够的磁盘空间。",{"type":17,"tag":179,"props":185,"children":186},{},[187],{"type":23,"value":188},"下载时间可能因网络环境而异，建议在稳定的网络环境下操作。",{"type":17,"tag":25,"props":190,"children":191},{},[192],{"type":17,"tag":99,"props":193,"children":194},{},[195],{"type":23,"value":196},"3、快速开始",{"type":17,"tag":25,"props":198,"children":199},{},[200],{"type":23,"value":201},"GLM-4.5推理需要2台（16卡）Atlas 800T/800I A2（64G）服务器服务器（基于BF16权重）。 两台服务器需要提前配置好组网环境，两台设备的卡与卡之间能够互相ping通。 昇思MindSpore提供了GLM-4.5推理可用的Docker容器镜像，供开发者快速体验。",{"type":17,"tag":25,"props":203,"children":204},{},[205],{"type":17,"tag":99,"props":206,"children":207},{},[208],{"type":23,"value":209},"3.1 2台服务器分别停止其他进程，避免服务器中其他进程影响",{"type":17,"tag":136,"props":211,"children":213},{"code":212},"pkill -9 python\npkill -9 mindie\npkill -9 ray\n",[214],{"type":17,"tag":141,"props":215,"children":216},{"__ignoreMap":7},[217],{"type":23,"value":212},{"type":17,"tag":25,"props":219,"children":220},{},[221],{"type":17,"tag":99,"props":222,"children":223},{},[224],{"type":23,"value":225},"3.2 2台服务器分别下载昇思 MindSpore 推理容器镜像",{"type":17,"tag":25,"props":227,"children":228},{},[229],{"type":23,"value":230},"执行以下 Shell 命令，拉取昇思 MindSpore GLM-4.5推理容器镜像：",{"type":17,"tag":136,"props":232,"children":234},{"code":233},"docker pull swr.cn-central-221.ovaijisuan.com/mindformers/glm4.5moe-infer:20250728\n",[235],{"type":17,"tag":141,"props":236,"children":237},{"__ignoreMap":7},[238],{"type":23,"value":233},{"type":17,"tag":25,"props":240,"children":241},{},[242],{"type":17,"tag":99,"props":243,"children":244},{},[245],{"type":23,"value":246},"3.3 启动容器",{"type":17,"tag":25,"props":248,"children":249},{},[250],{"type":23,"value":251},"2台服务器分别执行以下命令创建并启动容器（/mnt/data/GLM-4.5用于存放权重路径，若没有/mnt盘则要修改）。",{"type":17,"tag":25,"props":253,"children":254},{},[255],{"type":23,"value":256},"2台服务器执行命令的区别在于，hostname需要不同。",{"type":17,"tag":136,"props":258,"children":260},{"code":259},"docker run -it \\\n--privileged \\\n--name=GLM-4.5 \\\n--net=host \\\n--cap-add=SYS_PTRACE \\\n--security-opt seccomp=unconfined \\\n--hostname=worker2 \\\n--device=/dev/davinci0 \\\n--device=/dev/davinci1 \\\n--device=/dev/davinci2 \\\n--device=/dev/davinci3 \\\n--device=/dev/davinci4 \\\n--device=/dev/davinci5 \\\n--device=/dev/davinci6 \\\n--device=/dev/davinci7 \\\n--device=/dev/davinci_manager \\\n--device=/dev/hisi_hdc \\\n--device=/dev/devmm_svm \\\n--device=/dev/davinci_manager \\\n-v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \\\n-v /usr/local/Ascend/driver/:/usr/local/Ascend/driver/ \\\n-v /usr/local/sbin:/usr/local/sbin \\\n-v /etc/hccn.conf:/etc/hccn.conf \\\n-v /mnt/data/GLM-4.5/:/mnt/data/GLM-4.5/ \\\nswr.cn-central-221.ovaijisuan.com/mindformers/glm4.5moe-infer:20250728 \\\n/bin/bash\n",[261],{"type":17,"tag":141,"props":262,"children":263},{"__ignoreMap":7},[264],{"type":23,"value":259},{"type":17,"tag":25,"props":266,"children":267},{},[268],{"type":17,"tag":99,"props":269,"children":270},{},[271],{"type":23,"value":173},{"type":17,"tag":175,"props":273,"children":274},{},[275],{"type":17,"tag":179,"props":276,"children":277},{},[278],{"type":23,"value":279},"后续操作，除了发起推理请求可以在容器外进行，其余操作均在容器内进行。",{"type":17,"tag":25,"props":281,"children":282},{},[283],{"type":17,"tag":99,"props":284,"children":285},{},[286],{"type":23,"value":287},"4、服务化部署",{"type":17,"tag":25,"props":289,"children":290},{},[291],{"type":17,"tag":99,"props":292,"children":293},{},[294],{"type":23,"value":295},"4.1 添加环境变量",{"type":17,"tag":25,"props":297,"children":298},{},[299],{"type":23,"value":300},"在2台服务器中都添加如下环境变量：",{"type":17,"tag":136,"props":302,"children":304},{"code":303},"export vLLM_MODEL_BACKEND=MindFormers\nexport ASCEND_TOTAL_MEMORY_GB=64\n",[305],{"type":17,"tag":141,"props":306,"children":307},{"__ignoreMap":7},[308],{"type":23,"value":303},{"type":17,"tag":25,"props":310,"children":311},{},[312],{"type":17,"tag":99,"props":313,"children":314},{},[315],{"type":23,"value":316},"4.2 2台设备设置主机和辅机，通过ray进程关联",{"type":17,"tag":25,"props":318,"children":319},{},[320],{"type":23,"value":321},"选择一台设备作为主节点，执行如下命令 ray stop ray start --head --port=6380。",{"type":17,"tag":25,"props":323,"children":324},{},[325],{"type":23,"value":326},"另一台设备作为辅节点，依次执行如下命令 ray stop ray start --address=主节点IP:6380。",{"type":17,"tag":25,"props":328,"children":329},{},[330],{"type":17,"tag":99,"props":331,"children":332},{},[333],{"type":23,"value":334},"4.3 拉起服务",{"type":17,"tag":25,"props":336,"children":337},{},[338],{"type":23,"value":339},"在主节点容器中拉起服务。355b至少需要16张卡，所以tensor_parallel_size=16",{"type":17,"tag":136,"props":341,"children":343},{"code":342},"python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model \"/mnt/data/GLM-4.5\" --trust_remote_code --tensor_parallel_size=16 --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=32 --gpu-memory-utilization=0.93 --distributed-executor-backend=ray\n",[344],{"type":17,"tag":141,"props":345,"children":346},{"__ignoreMap":7},[347],{"type":23,"value":342},{"type":17,"tag":25,"props":349,"children":350},{},[351],{"type":17,"tag":99,"props":352,"children":353},{},[354],{"type":23,"value":355},"4.4 执行推理请求测试",{"type":17,"tag":25,"props":357,"children":358},{},[359],{"type":23,"value":360},"打开新的窗口，执行以下命令发送推理请求测试。",{"type":17,"tag":136,"props":362,"children":364},{"code":363},"curl http://localhost:8000/v1/chat/completions -H \"Content-Type: application/json\" -d '{\n  \"model\": \"/mnt/data/GLM-4.5\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"介绍一下北京\"}\n  ],\n  \"temperature\": 0.6,\n  \"top_p\": 0.95,\n  \"top_k\": 20,\n  \"min_p\": 0,\n  \"max_tokens\": 8192,\n  \"presence_penalty\": 1.05,\n  \"chat_template_kwargs\": {\"enable_thinking\": true}\n}'\n",[365],{"type":17,"tag":141,"props":366,"children":367},{"__ignoreMap":7},[368],{"type":23,"value":363},{"type":17,"tag":25,"props":370,"children":371},{},[372,377,379],{"type":17,"tag":99,"props":373,"children":374},{},[375],{"type":23,"value":376},"# 02",{"type":23,"value":378}," ",{"type":17,"tag":99,"props":380,"children":381},{},[382],{"type":23,"value":383},"GLM-4.5-Air vllm-mindspore 推理指南",{"type":17,"tag":25,"props":385,"children":386},{},[387],{"type":17,"tag":99,"props":388,"children":389},{},[390],{"type":23,"value":111},{"type":17,"tag":25,"props":392,"children":393},{},[394,395],{"type":23,"value":116},{"type":17,"tag":36,"props":396,"children":398},{"href":76,"rel":397},[40],[399],{"type":23,"value":76},{"type":17,"tag":25,"props":401,"children":402},{},[403],{"type":17,"tag":99,"props":404,"children":405},{},[406],{"type":23,"value":129},{"type":17,"tag":25,"props":408,"children":409},{},[410],{"type":23,"value":411},"执行以下命令为自定义下载路径 /mnt/data/GLM-4.5-Air 添加白名单：",{"type":17,"tag":136,"props":413,"children":415},{"code":414},"export HUB_WHITE_LIST_PATHS=/mnt/data/GLM-4.5-Air\n",[416],{"type":17,"tag":141,"props":417,"children":418},{"__ignoreMap":7},[419],{"type":23,"value":414},{"type":17,"tag":25,"props":421,"children":422},{},[423],{"type":23,"value":424},"执行以下命令从魔乐社区下载GLM-4.5-Air权重文件至指定路径 /mnt/data/GLM-4.5-Air 。2台服务器分别下载，都需要占用约 220GB 的磁盘空间：",{"type":17,"tag":136,"props":426,"children":427},{"code":152},[428],{"type":17,"tag":141,"props":429,"children":430},{"__ignoreMap":7},[431],{"type":23,"value":152},{"type":17,"tag":136,"props":433,"children":435},{"code":434},"python\n\nfrom openmind_hub import snapshot_download\n\nsnapshot_download(\n    repo_id=\"MindSpore-Lab/GLM-4.5-Air\",\n    local_dir=\"/mnt/data/GLM-4.5-Air\",\n    local_dir_use_symlinks=False\n)\nexit()\n",[436],{"type":17,"tag":141,"props":437,"children":438},{"__ignoreMap":7},[439],{"type":23,"value":434},{"type":17,"tag":25,"props":441,"children":442},{},[443],{"type":17,"tag":99,"props":444,"children":445},{},[446],{"type":23,"value":173},{"type":17,"tag":175,"props":448,"children":449},{},[450,455],{"type":17,"tag":179,"props":451,"children":452},{},[453],{"type":23,"value":454},"/mnt/data/GLM-4.5-Air 可修改为自定义路径，确保该路径有足够的磁盘空间。",{"type":17,"tag":179,"props":456,"children":457},{},[458],{"type":23,"value":188},{"type":17,"tag":25,"props":460,"children":461},{},[462],{"type":17,"tag":99,"props":463,"children":464},{},[465],{"type":23,"value":196},{"type":17,"tag":25,"props":467,"children":468},{},[469],{"type":23,"value":470},"GLM-4.5-Air推理需要1台（8卡）Atlas 800T/800I A2（64G）服务器服务器（基于BF16权重）。昇思MindSpore提供了GLM-4.5-Air推理可用的Docker容器镜像，供开发者快速体验。",{"type":17,"tag":25,"props":472,"children":473},{},[474],{"type":17,"tag":99,"props":475,"children":476},{},[477],{"type":23,"value":478},"3.1 停止其他进程，避免服务器中其他进程影响",{"type":17,"tag":136,"props":480,"children":481},{"code":212},[482],{"type":17,"tag":141,"props":483,"children":484},{"__ignoreMap":7},[485],{"type":23,"value":212},{"type":17,"tag":25,"props":487,"children":488},{},[489],{"type":17,"tag":99,"props":490,"children":491},{},[492],{"type":23,"value":493},"3.2 下载昇思 MindSpore 推理容器镜像",{"type":17,"tag":25,"props":495,"children":496},{},[497],{"type":23,"value":498},"执行以下 Shell 命令，拉取昇思 MindSpore GLM-4.5-Air推理容器镜像：",{"type":17,"tag":136,"props":500,"children":501},{"code":233},[502],{"type":17,"tag":141,"props":503,"children":504},{"__ignoreMap":7},[505],{"type":23,"value":233},{"type":17,"tag":25,"props":507,"children":508},{},[509],{"type":17,"tag":99,"props":510,"children":511},{},[512],{"type":23,"value":246},{"type":17,"tag":25,"props":514,"children":515},{},[516],{"type":23,"value":517},"执行以下命令创建并启动容器（/mnt/data/GLM-4.5-Air用于存放权重路径，若没有/mnt盘则要修改）：",{"type":17,"tag":136,"props":519,"children":521},{"code":520},"docker run -it \\\n--privileged \\\n--name=GLM-4.5-Air \\\n--net=host \\\n--cap-add=SYS_PTRACE \\\n--security-opt seccomp=unconfined \\\n--device=/dev/davinci0 \\\n--device=/dev/davinci1 \\\n--device=/dev/davinci2 \\\n--device=/dev/davinci3 \\\n--device=/dev/davinci4 \\\n--device=/dev/davinci5 \\\n--device=/dev/davinci6 \\\n--device=/dev/davinci7 \\\n--device=/dev/davinci_manager \\\n--device=/dev/hisi_hdc \\\n--device=/dev/devmm_svm \\\n--device=/dev/davinci_manager \\\n-v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \\\n-v /usr/local/Ascend/driver/:/usr/local/Ascend/driver/ \\\n-v /usr/local/sbin:/usr/local/sbin \\\n-v /etc/hccn.conf:/etc/hccn.conf \\\n-v /mnt/data/GLM-4.5-Air/:/mnt/data/GLM-4.5-Air/ \\\nswr.cn-central-221.ovaijisuan.com/mindformers/glm4.5moe-infer:20250728 \\\n/bin/bash\n",[522],{"type":17,"tag":141,"props":523,"children":524},{"__ignoreMap":7},[525],{"type":23,"value":520},{"type":17,"tag":25,"props":527,"children":528},{},[529],{"type":17,"tag":99,"props":530,"children":531},{},[532],{"type":23,"value":173},{"type":17,"tag":175,"props":534,"children":535},{},[536],{"type":17,"tag":179,"props":537,"children":538},{},[539],{"type":23,"value":279},{"type":17,"tag":25,"props":541,"children":542},{},[543],{"type":17,"tag":99,"props":544,"children":545},{},[546],{"type":23,"value":287},{"type":17,"tag":25,"props":548,"children":549},{},[550],{"type":17,"tag":99,"props":551,"children":552},{},[553],{"type":23,"value":295},{"type":17,"tag":25,"props":555,"children":556},{},[557],{"type":23,"value":558},"在服务器中都添加如下环境变量：",{"type":17,"tag":136,"props":560,"children":561},{"code":303},[562],{"type":17,"tag":141,"props":563,"children":564},{"__ignoreMap":7},[565],{"type":23,"value":303},{"type":17,"tag":25,"props":567,"children":568},{},[569],{"type":17,"tag":99,"props":570,"children":571},{},[572],{"type":23,"value":573},"4.2 拉起服务",{"type":17,"tag":25,"props":575,"children":576},{},[577],{"type":23,"value":578},"执行以下命令拉起服务：",{"type":17,"tag":136,"props":580,"children":582},{"code":581},"python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model \"/mnt/data/GLM-4.5-Air\" --trust_remote_code --tensor_parallel_size=8 --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=32 --gpu-memory-utilization=0.93\n",[583],{"type":17,"tag":141,"props":584,"children":585},{"__ignoreMap":7},[586],{"type":23,"value":581},{"type":17,"tag":25,"props":588,"children":589},{},[590],{"type":17,"tag":99,"props":591,"children":592},{},[593],{"type":23,"value":594},"4.3 执行推理请求测试",{"type":17,"tag":25,"props":596,"children":597},{},[598],{"type":23,"value":360},{"type":17,"tag":136,"props":600,"children":602},{"code":601},"curl http://localhost:8000/v1/chat/completions -H \"Content-Type: application/json\" -d '{\n  \"model\": \"/mnt/data/GLM-4.5-Air\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"介绍一下北京\"}\n  ],\n  \"temperature\": 0.6,\n  \"top_p\": 0.95,\n  \"top_k\": 20,\n  \"min_p\": 0,\n  \"max_tokens\": 8192,\n  \"presence_penalty\": 1.05,\n  \"chat_template_kwargs\": {\"enable_thinking\": true}\n}'\n",[603],{"type":17,"tag":141,"props":604,"children":605},{"__ignoreMap":7},[606],{"type":23,"value":601},{"title":7,"searchDepth":608,"depth":608,"links":609},4,[],"markdown","content:news:zh:3806.md","content","news/zh/3806.md","news/zh/3806","md",1776506089788]